Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.
Load NFL play-by-play data using nflfastR, nfl_data_py, and various APIs
# Install and load nflfastR
# install.packages("nflfastR")
library(nflfastR)
library(tidyverse)
# Load single season
pbp_2023 <- load_pbp(2023)
# Load multiple seasons
pbp_multi <- load_pbp(2021:2023)
# View structure
glimpse(pbp_2023)
# Quick summary
cat("Total plays:", nrow(pbp_2023), "\n")
cat("Columns:", ncol(pbp_2023), "\n")
# Install: pip install nfl_data_py
import nfl_data_py as nfl
import pandas as pd
# Load single season
pbp_2023 = nfl.import_pbp_data([2023])
# Load multiple seasons
pbp_multi = nfl.import_pbp_data([2021, 2022, 2023])
# View structure
print(pbp_2023.info())
# Quick summary
print(f"Total plays: {len(pbp_2023)}")
print(f"Columns: {len(pbp_2023.columns)}")
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load current rosters
rosters <- fast_scraper_roster(2023)
# Load historical rosters
rosters_multi <- fast_scraper_roster(2020:2023)
# Filter to specific position
qbs <- rosters %>%
filter(position == "QB") %>%
select(full_name, team, age, years_exp, draft_number)
head(qbs)
import nfl_data_py as nfl
import pandas as pd
# Load current rosters
rosters = nfl.import_rosters([2023])
# Load historical rosters
rosters_multi = nfl.import_rosters([2020, 2021, 2022, 2023])
# Filter to specific position
qbs = rosters[rosters["position"] == "QB"][
["player_name", "team", "age", "years_exp", "draft_number"]
]
print(qbs.head())
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load Next Gen Stats - Passing
ngs_passing <- load_nextgen_stats(
seasons = 2023,
stat_type = "passing"
)
# Load Next Gen Stats - Rushing
ngs_rushing <- load_nextgen_stats(
seasons = 2023,
stat_type = "rushing"
)
# Load Next Gen Stats - Receiving
ngs_receiving <- load_nextgen_stats(
seasons = 2023,
stat_type = "receiving"
)
# View top passers by avg air yards
ngs_passing %>%
filter(week == 0) %>% # Season totals
arrange(desc(avg_air_yards_to_sticks)) %>%
select(player_display_name, team_abbr, avg_air_yards_to_sticks,
completion_percentage_above_expectation) %>%
head(10)
import nfl_data_py as nfl
import pandas as pd
# Load Next Gen Stats - Passing
ngs_passing = nfl.import_ngs_data(
stat_type="passing",
years=[2023]
)
# Load Next Gen Stats - Rushing
ngs_rushing = nfl.import_ngs_data(
stat_type="rushing",
years=[2023]
)
# Load Next Gen Stats - Receiving
ngs_receiving = nfl.import_ngs_data(
stat_type="receiving",
years=[2023]
)
# View top passers by avg air yards (season totals)
season_totals = ngs_passing[ngs_passing["week"] == 0]
top_passers = season_totals.nlargest(10, "avg_air_yards_to_sticks")[
["player_display_name", "team_abbr", "avg_air_yards_to_sticks",
"completion_percentage_above_expectation"]
]
print(top_passers)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load combine data
combine <- load_combine()
# Filter recent years
combine_recent <- combine %>%
filter(draft_year >= 2020)
# Top 40-yard dash times by position
combine_recent %>%
filter(!is.na(forty)) %>%
group_by(pos) %>%
slice_min(forty, n = 3) %>%
select(draft_year, player_name, pos, forty, vertical, broad_jump) %>%
arrange(pos, forty)
import nfl_data_py as nfl
import pandas as pd
# Load combine data
combine = nfl.import_combine_data()
# Filter recent years
combine_recent = combine[combine["draft_year"] >= 2020]
# Top 40-yard dash times by position
top_forty = (combine_recent[combine_recent["forty"].notna()]
.groupby("pos")
.apply(lambda x: x.nsmallest(3, "forty"))
.reset_index(drop=True)
[["draft_year", "player_name", "pos", "forty", "vertical", "broad_jump"]]
)
print(top_forty)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load schedules with results
schedules <- load_schedules(2020:2023)
# Filter to completed games
completed <- schedules %>%
filter(!is.na(result))
# Calculate home win percentage
home_win_pct <- completed %>%
summarize(
home_wins = sum(result > 0),
away_wins = sum(result < 0),
ties = sum(result == 0),
home_win_pct = home_wins / n()
)
print(home_win_pct)
# Spread analysis
schedules %>%
filter(!is.na(spread_line), !is.na(result)) %>%
mutate(
home_covered = result + spread_line > 0,
away_covered = result + spread_line < 0
) %>%
summarize(
home_cover_rate = mean(home_covered, na.rm = TRUE),
avg_spread = mean(spread_line),
avg_margin = mean(abs(result))
)
import nfl_data_py as nfl
import pandas as pd
# Load schedules with results
schedules = nfl.import_schedules([2020, 2021, 2022, 2023])
# Filter to completed games
completed = schedules[schedules["result"].notna()]
# Calculate home win percentage
home_wins = (completed["result"] > 0).sum()
away_wins = (completed["result"] < 0).sum()
ties = (completed["result"] == 0).sum()
home_win_pct = home_wins / len(completed)
print(f"Home Win %: {home_win_pct:.1%}")
# Spread analysis
with_spread = completed[completed["spread_line"].notna()]
with_spread["home_covered"] = with_spread["result"] + with_spread["spread_line"] > 0
print(f"Home Cover Rate: {with_spread['home_covered'].mean():.1%}")
print(f"Avg Spread: {with_spread['spread_line'].mean():.1f}")
print(f"Avg Margin: {with_spread['result'].abs().mean():.1f}")
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load participation data
participation <- load_participation(2023)
# View structure
glimpse(participation)
# Analyze snap counts by player
snap_counts <- participation %>%
separate_rows(offense_players, sep = ";") %>%
filter(offense_players != "") %>%
group_by(offense_players) %>%
summarize(
snaps = n(),
games = n_distinct(game_id)
) %>%
arrange(desc(snaps))
head(snap_counts, 20)
import nfl_data_py as nfl
import pandas as pd
# Load participation data
participation = nfl.import_participation([2023])
# View structure
print(participation.info())
# Count offensive snaps (simplified)
# Note: participation data structure varies
print(f"Total plays with participation: {len(participation)}")
print(participation.head())
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load injuries data
injuries <- load_injuries(2023)
# View structure
glimpse(injuries)
# Count injuries by team and status
injury_summary <- injuries %>%
group_by(team, report_status) %>%
summarize(players = n(), .groups = "drop") %>%
pivot_wider(names_from = report_status, values_from = players, values_fill = 0)
print(injury_summary)
# Find most common injury types
injuries %>%
filter(!is.na(report_primary_injury)) %>%
count(report_primary_injury, sort = TRUE) %>%
head(10)
import nfl_data_py as nfl
import pandas as pd
# Load injuries data
injuries = nfl.import_injuries([2023])
# View structure
print(injuries.info())
# Count injuries by status
if "report_status" in injuries.columns:
status_counts = injuries["report_status"].value_counts()
print("Injury Status Distribution:")
print(status_counts)
# Most common injuries
if "report_primary_injury" in injuries.columns:
injury_types = injuries["report_primary_injury"].value_counts().head(10)
print("\nMost Common Injuries:")
print(injury_types)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load draft picks
draft <- load_draft_picks()
# Filter to recent years
recent_draft <- draft %>%
filter(season >= 2020)
# Analyze picks by position
position_analysis <- recent_draft %>%
group_by(season, position) %>%
summarize(
picks = n(),
avg_pick = mean(pick),
.groups = "drop"
)
# First round by position
first_round <- recent_draft %>%
filter(round == 1) %>%
count(position, sort = TRUE)
print(first_round)
# Top 10 picks each year
recent_draft %>%
filter(pick <= 10) %>%
select(season, pick, team, pfr_player_name, position) %>%
arrange(season, pick)
import nfl_data_py as nfl
import pandas as pd
# Load draft picks
draft = nfl.import_draft_picks()
# Filter to recent years
recent_draft = draft[draft["season"] >= 2020]
# First round picks by position
first_round = recent_draft[recent_draft["round"] == 1]
position_counts = first_round["position"].value_counts()
print("First Round Picks by Position (2020+):")
print(position_counts)
# Top 10 picks
top_picks = recent_draft[recent_draft["pick"] <= 10][
["season", "pick", "team", "pfr_player_name", "position"]
].sort_values(["season", "pick"])
print("\nTop 10 Picks:")
print(top_picks)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load snap counts
snaps <- load_snap_counts(2023)
# View structure
glimpse(snaps)
# Top offensive snap leaders
off_snaps <- snaps %>%
group_by(player, position, team) %>%
summarize(
games = n(),
total_off_snaps = sum(offense_snaps, na.rm = TRUE),
avg_off_snaps = mean(offense_snaps, na.rm = TRUE),
avg_off_pct = mean(offense_pct, na.rm = TRUE),
.groups = "drop"
) %>%
filter(games >= 10) %>%
arrange(desc(total_off_snaps))
# Top snap share by position
snaps %>%
filter(position %in% c("QB", "RB", "WR", "TE")) %>%
group_by(position) %>%
slice_max(offense_pct, n = 5) %>%
select(player, team, position, week, offense_snaps, offense_pct)
import nfl_data_py as nfl
import pandas as pd
# Load snap counts
snaps = nfl.import_snap_counts([2023])
# View structure
print(snaps.info())
# Top offensive snap leaders
off_snaps = (snaps.groupby(["player", "position", "team"])
.agg(
games=("week", "count"),
total_off_snaps=("offense_snaps", "sum"),
avg_off_snaps=("offense_snaps", "mean"),
avg_off_pct=("offense_pct", "mean")
)
.reset_index())
off_snaps = off_snaps[off_snaps["games"] >= 10].sort_values(
"total_off_snaps", ascending=False)
print("Top Offensive Snap Leaders:")
print(off_snaps.head(20))
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load schedules for win totals context
schedules <- load_schedules(2023)
# Calculate actual wins per team
actual_wins <- schedules %>%
filter(!is.na(result)) %>%
mutate(
home_win = result > 0,
away_win = result < 0
) %>%
pivot_longer(
cols = c(home_team, away_team),
names_to = "location",
values_to = "team"
) %>%
mutate(
win = case_when(
location == "home_team" & result > 0 ~ 1,
location == "away_team" & result < 0 ~ 1,
result == 0 ~ 0.5,
TRUE ~ 0
)
) %>%
group_by(team) %>%
summarize(
games = n(),
wins = sum(win),
losses = games - wins
) %>%
arrange(desc(wins))
print(actual_wins)
import nfl_data_py as nfl
import pandas as pd
# Load schedules
schedules = nfl.import_schedules([2023])
# Filter to completed games
completed = schedules[schedules["result"].notna()]
# Calculate wins for home teams
home_wins = completed.groupby("home_team").apply(
lambda x: (x["result"] > 0).sum() + (x["result"] == 0).sum() * 0.5
).reset_index(name="home_wins")
# Calculate wins for away teams
away_wins = completed.groupby("away_team").apply(
lambda x: (x["result"] < 0).sum() + (x["result"] == 0).sum() * 0.5
).reset_index(name="away_wins")
# Combine
wins = home_wins.merge(away_wins, left_on="home_team", right_on="away_team")
wins["total_wins"] = wins["home_wins"] + wins["away_wins"]
wins = wins[["home_team", "total_wins"]].rename(columns={"home_team": "team"})
wins = wins.sort_values("total_wins", ascending=False)
print("Team Win Totals:")
print(wins)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load multiple data sources
pbp <- load_pbp(2023)
rosters <- fast_scraper_roster(2023)
# Get passer stats with roster info
passer_stats <- pbp %>%
filter(!is.na(passer_player_id)) %>%
group_by(passer_player_id, passer_player_name) %>%
summarize(
attempts = n(),
completions = sum(complete_pass),
yards = sum(passing_yards, na.rm = TRUE),
tds = sum(pass_touchdown),
ints = sum(interception),
epa = sum(epa, na.rm = TRUE),
.groups = "drop"
)
# Merge with roster data
passer_with_roster <- passer_stats %>%
left_join(
rosters %>% select(gsis_id, full_name, team, age, height, weight, college),
by = c("passer_player_id" = "gsis_id")
)
# View combined data
passer_with_roster %>%
filter(attempts >= 100) %>%
select(passer_player_name, team, age, college, attempts, yards, tds, epa) %>%
arrange(desc(epa))
import nfl_data_py as nfl
import pandas as pd
# Load multiple data sources
pbp = nfl.import_pbp_data([2023])
rosters = nfl.import_rosters([2023])
# Get passer stats
passer_stats = (pbp[pbp["passer_player_id"].notna()]
.groupby(["passer_player_id", "passer_player_name"])
.agg(
attempts=("play_id", "count"),
completions=("complete_pass", "sum"),
yards=("passing_yards", "sum"),
tds=("pass_touchdown", "sum"),
ints=("interception", "sum"),
epa=("epa", "sum")
)
.reset_index())
# Merge with roster data
passer_with_roster = passer_stats.merge(
rosters[["gsis_id", "player_name", "team", "age", "college"]],
left_on="passer_player_id",
right_on="gsis_id",
how="left"
)
# View combined data
result = (passer_with_roster[passer_with_roster["attempts"] >= 100]
.sort_values("epa", ascending=False)
[["passer_player_name", "team", "age", "college", "attempts", "yards", "tds", "epa"]])
print(result)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
# Load QBR data
qbr <- load_espn_qbr(
league = "nfl",
seasons = 2023,
summary_type = "season"
)
# View structure
glimpse(qbr)
# Top QBs by Total QBR
qbr %>%
arrange(desc(qbr_total)) %>%
select(name_display, team_abb, qbr_total, pts_added, pass_rating) %>%
head(15)
# Weekly QBR trends
qbr_weekly <- load_espn_qbr(
league = "nfl",
seasons = 2023,
summary_type = "week"
)
# Best single-game performances
qbr_weekly %>%
arrange(desc(qbr_total)) %>%
select(name_display, week, team_abb, qbr_total, pts_added) %>%
head(10)
import nfl_data_py as nfl
import pandas as pd
# Load QBR data
qbr = nfl.import_qbr([2023])
# View structure
print(qbr.info())
# Top QBs by QBR (if available)
if "qbr_total" in qbr.columns:
top_qbr = qbr.nlargest(15, "qbr_total")[
["name_display", "team_abb", "qbr_total", "pts_added", "pass_rating"]
]
print("Top QBs by Total QBR:")
print(top_qbr)
else:
print("Available columns:", qbr.columns.tolist())
print(qbr.head())
nflfastR
tidyverse
nfl_data_py
pandas
nflfastR - Play-by-play data with EPAnflplotR - NFL team logos & plottingtidyverse - Data manipulation & visualizationggplot2 - Advanced visualizationsnfl_data_py - NFL data (nflverse compatible)pandas - Data manipulationmatplotlib - Visualizationsscikit-learn - Machine learningLearn the theory behind these techniques in our comprehensive tutorial series
Browse Tutorials