Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.
Deep dive into quarterback and passing game analytics
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# QB air yards analysis
qb_air_yards <- pbp %>%
filter(!is.na(passer_player_id), !is.na(air_yards)) %>%
group_by(passer_player_id, passer_player_name) %>%
summarize(
attempts = n(),
total_air_yards = sum(air_yards),
avg_air_yards = mean(air_yards),
deep_pct = mean(air_yards >= 20) * 100,
short_pct = mean(air_yards < 5) * 100,
.groups = "drop"
) %>%
filter(attempts >= 200) %>%
arrange(desc(avg_air_yards))
print(qb_air_yards)
# Air yards by result
pbp %>%
filter(!is.na(air_yards)) %>%
mutate(result = if_else(complete_pass == 1, "Complete", "Incomplete")) %>%
group_by(result) %>%
summarize(avg_air_yards = mean(air_yards))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# QB air yards analysis
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["air_yards"].notna())]
qb_air_yards = (passes.groupby(["passer_player_id", "passer_player_name"])
.agg(
attempts=("air_yards", "count"),
total_air_yards=("air_yards", "sum"),
avg_air_yards=("air_yards", "mean"),
deep_pct=("air_yards", lambda x: (x >= 20).mean() * 100)
)
.reset_index())
qb_air_yards = qb_air_yards[qb_air_yards["attempts"] >= 200].sort_values(
"avg_air_yards", ascending=False)
print("QB Air Yards Analysis:")
print(qb_air_yards)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# CPOE analysis
qb_cpoe <- pbp %>%
filter(!is.na(passer_player_id), !is.na(cpoe)) %>%
group_by(passer_player_id, passer_player_name) %>%
summarize(
attempts = n(),
completion_pct = mean(complete_pass) * 100,
exp_completion_pct = mean(cp) * 100,
cpoe = mean(cpoe),
.groups = "drop"
) %>%
filter(attempts >= 200) %>%
arrange(desc(cpoe))
print(qb_cpoe)
# CPOE by depth of target
pbp %>%
filter(!is.na(cpoe), !is.na(air_yards)) %>%
mutate(
depth = case_when(
air_yards < 0 ~ "Behind LOS",
air_yards < 10 ~ "Short (0-9)",
air_yards < 20 ~ "Medium (10-19)",
TRUE ~ "Deep (20+)"
)
) %>%
group_by(depth) %>%
summarize(
attempts = n(),
avg_cpoe = mean(cpoe),
completion_rate = mean(complete_pass)
)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# CPOE analysis
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["cpoe"].notna())]
qb_cpoe = (passes.groupby(["passer_player_id", "passer_player_name"])
.agg(
attempts=("cpoe", "count"),
completion_pct=("complete_pass", lambda x: x.mean() * 100),
cpoe=("cpoe", "mean")
)
.reset_index())
qb_cpoe = qb_cpoe[qb_cpoe["attempts"] >= 200].sort_values("cpoe", ascending=False)
print("QB CPOE Rankings:")
print(qb_cpoe)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Define depth zones
depth_analysis <- pbp %>%
filter(!is.na(air_yards), play_type == "pass") %>%
mutate(
depth_zone = case_when(
air_yards < 0 ~ "Behind LOS",
air_yards < 5 ~ "Short (0-4)",
air_yards < 10 ~ "Intermediate (5-9)",
air_yards < 15 ~ "Medium (10-14)",
air_yards < 20 ~ "Deep (15-19)",
TRUE ~ "Bomb (20+)"
)
) %>%
group_by(depth_zone) %>%
summarize(
attempts = n(),
completions = sum(complete_pass),
comp_pct = mean(complete_pass) * 100,
avg_epa = mean(epa, na.rm = TRUE),
td_rate = mean(pass_touchdown, na.rm = TRUE) * 100,
int_rate = mean(interception) * 100,
.groups = "drop"
)
print(depth_analysis)
# Team deep passing rankings
pbp %>%
filter(air_yards >= 20, !is.na(epa)) %>%
group_by(posteam) %>%
summarize(
deep_attempts = n(),
deep_comp_pct = mean(complete_pass) * 100,
deep_epa = mean(epa)
) %>%
arrange(desc(deep_epa))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Filter pass plays with air yards
passes = pbp[(pbp["air_yards"].notna()) & (pbp["play_type"] == "pass")]
# Depth zones
def get_depth_zone(ay):
if ay < 0: return "Behind LOS"
elif ay < 5: return "Short (0-4)"
elif ay < 10: return "Intermediate (5-9)"
elif ay < 15: return "Medium (10-14)"
elif ay < 20: return "Deep (15-19)"
else: return "Bomb (20+)"
passes["depth_zone"] = passes["air_yards"].apply(get_depth_zone)
depth_analysis = (passes.groupby("depth_zone")
.agg(
attempts=("air_yards", "count"),
comp_pct=("complete_pass", lambda x: x.mean() * 100),
avg_epa=("epa", "mean"),
td_rate=("pass_touchdown", lambda x: x.mean() * 100)
)
.reset_index())
print("Passing Efficiency by Depth:")
print(depth_analysis)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# QB performance under pressure
qb_pressure <- pbp %>%
filter(!is.na(passer_player_id), play_type == "pass") %>%
group_by(passer_player_id, passer_player_name) %>%
summarize(
dropbacks = n(),
sacks = sum(sack),
sack_rate = mean(sack) * 100,
epa_no_sack = mean(epa[sack == 0], na.rm = TRUE),
.groups = "drop"
) %>%
filter(dropbacks >= 200) %>%
arrange(sack_rate)
print(qb_pressure)
# EPA with vs without pressure (using sack as proxy)
pbp %>%
filter(play_type == "pass", !is.na(epa)) %>%
group_by(sack = as.logical(sack)) %>%
summarize(
plays = n(),
avg_epa = mean(epa)
)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# QB pressure analysis
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["play_type"] == "pass")]
qb_pressure = (passes.groupby(["passer_player_id", "passer_player_name"])
.agg(
dropbacks=("sack", "count"),
sacks=("sack", "sum"),
sack_rate=("sack", lambda x: x.mean() * 100)
)
.reset_index())
qb_pressure = qb_pressure[qb_pressure["dropbacks"] >= 200].sort_values("sack_rate")
print("QB Sack Rates (lowest is best):")
print(qb_pressure)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Create pass location data
pass_locations <- pbp %>%
filter(
play_type == "pass",
!is.na(air_yards),
!is.na(pass_location)
) %>%
mutate(
x_loc = case_when(
pass_location == "left" ~ -1,
pass_location == "middle" ~ 0,
pass_location == "right" ~ 1
)
)
# Team passing tendencies
team_tendencies <- pass_locations %>%
group_by(posteam, pass_location) %>%
summarize(
attempts = n(),
epa = mean(epa, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(posteam) %>%
mutate(pct = attempts / sum(attempts) * 100)
# Visualize (sample for one team)
team_tendencies %>%
filter(posteam == "KC") %>%
ggplot(aes(x = pass_location, y = pct, fill = epa)) +
geom_col() +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0) +
labs(title = "KC Pass Location Tendencies", y = "% of Passes") +
theme_minimal()
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
pbp = nfl.import_pbp_data([2023])
# Pass location analysis
passes = pbp[
(pbp["play_type"] == "pass") &
(pbp["air_yards"].notna()) &
(pbp["pass_location"].notna())
]
# Team passing tendencies
team_tendencies = (passes.groupby(["posteam", "pass_location"])
.agg(
attempts=("epa", "count"),
epa=("epa", "mean")
)
.reset_index())
# Calculate percentages
team_totals = team_tendencies.groupby("posteam")["attempts"].transform("sum")
team_tendencies["pct"] = team_tendencies["attempts"] / team_totals * 100
print("Team Pass Location Tendencies:")
print(team_tendencies[team_tendencies["posteam"] == "KC"])
nflfastR
tidyverse
ggplot2
nfl_data_py
pandas
matplotlib
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Dropback success rate (includes sacks)
qb_dropback <- pbp %>%
filter(!is.na(passer_player_id), qb_dropback == 1) %>%
group_by(passer_player_id, passer_player_name) %>%
summarize(
dropbacks = n(),
success_rate = mean(success, na.rm = TRUE) * 100,
epa_per_dropback = mean(epa, na.rm = TRUE),
sack_rate = mean(sack) * 100,
scramble_rate = mean(qb_scramble, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(dropbacks >= 200) %>%
arrange(desc(success_rate))
print(qb_dropback)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Dropback success rate
dropbacks = pbp[(pbp["passer_player_id"].notna()) & (pbp["qb_dropback"] == 1)]
qb_dropback = (dropbacks.groupby(["passer_player_id", "passer_player_name"])
.agg(
dropbacks=("success", "count"),
success_rate=("success", lambda x: x.mean() * 100),
epa_per_dropback=("epa", "mean"),
sack_rate=("sack", lambda x: x.mean() * 100)
)
.reset_index())
qb_dropback = qb_dropback[qb_dropback["dropbacks"] >= 200].sort_values(
"success_rate", ascending=False)
print("QB Dropback Success Rate:")
print(qb_dropback)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Play action analysis
play_action <- pbp %>%
filter(play_type == "pass", !is.na(epa)) %>%
mutate(play_action = if_else(is.na(play_action), FALSE, play_action))
# Overall comparison
play_action %>%
group_by(play_action) %>%
summarize(
attempts = n(),
avg_epa = mean(epa),
success_rate = mean(success) * 100,
avg_air_yards = mean(air_yards, na.rm = TRUE)
)
# Team play action usage and effectiveness
team_pa <- play_action %>%
group_by(posteam) %>%
summarize(
total_passes = n(),
pa_rate = mean(play_action) * 100,
pa_epa = mean(epa[play_action]),
no_pa_epa = mean(epa[!play_action]),
pa_advantage = mean(epa[play_action]) - mean(epa[!play_action])
) %>%
arrange(desc(pa_advantage))
print(team_pa)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Play action analysis
passes = pbp[(pbp["play_type"] == "pass") & (pbp["epa"].notna())].copy()
passes["play_action"] = passes["play_action"].fillna(False)
# Overall comparison
pa_comparison = (passes.groupby("play_action")
.agg(
attempts=("epa", "count"),
avg_epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
print("Play Action vs Standard Passing:")
print(pa_comparison)
# Team play action effectiveness
team_pa = (passes.groupby("posteam")
.apply(lambda x: pd.Series({
"pa_rate": x["play_action"].mean() * 100,
"pa_epa": x[x["play_action"]]["epa"].mean(),
"no_pa_epa": x[~x["play_action"]]["epa"].mean()
}))
.reset_index())
team_pa["pa_advantage"] = team_pa["pa_epa"] - team_pa["no_pa_epa"]
print("\nTeam Play Action Effectiveness:")
print(team_pa.sort_values("pa_advantage", ascending=False))
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# QB scramble analysis
qb_scrambles <- pbp %>%
filter(!is.na(passer_player_id)) %>%
group_by(passer_player_id, passer_player_name) %>%
summarize(
dropbacks = sum(qb_dropback, na.rm = TRUE),
scrambles = sum(qb_scramble, na.rm = TRUE),
scramble_rate = mean(qb_scramble, na.rm = TRUE) * 100,
scramble_yards = sum(yards_gained[qb_scramble == 1], na.rm = TRUE),
scramble_epa = mean(epa[qb_scramble == 1], na.rm = TRUE),
.groups = "drop"
) %>%
filter(dropbacks >= 200, scrambles >= 10) %>%
arrange(desc(scramble_rate))
print(qb_scrambles)
# Scramble success by situation
pbp %>%
filter(qb_scramble == 1) %>%
mutate(
situation = case_when(
down <= 2 & ydstogo <= 5 ~ "Short yardage",
down == 3 ~ "Third down",
down == 4 ~ "Fourth down",
TRUE ~ "Normal"
)
) %>%
group_by(situation) %>%
summarize(
scrambles = n(),
avg_yards = mean(yards_gained),
success_rate = mean(success)
)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# QB scramble analysis
qb_plays = pbp[pbp["passer_player_id"].notna()]
qb_scrambles = (qb_plays.groupby(["passer_player_id", "passer_player_name"])
.agg(
dropbacks=("qb_dropback", "sum"),
scrambles=("qb_scramble", "sum"),
scramble_rate=("qb_scramble", lambda x: x.mean() * 100)
)
.reset_index())
qb_scrambles = qb_scrambles[
(qb_scrambles["dropbacks"] >= 200) &
(qb_scrambles["scrambles"] >= 10)
].sort_values("scramble_rate", ascending=False)
print("QB Scramble Rates:")
print(qb_scrambles)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Top receivers by targets
receiver_targets <- pbp %>%
filter(!is.na(receiver_player_id), play_type == "pass") %>%
group_by(receiver_player_id, receiver_player_name, posteam) %>%
summarize(
targets = n(),
receptions = sum(complete_pass),
yards = sum(yards_gained, na.rm = TRUE),
tds = sum(pass_touchdown),
epa = sum(epa, na.rm = TRUE),
avg_depth = mean(air_yards, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(targets))
# Add target share
receiver_targets <- receiver_targets %>%
group_by(posteam) %>%
mutate(
team_targets = sum(targets),
target_share = targets / team_targets * 100
) %>%
ungroup()
print(receiver_targets %>% head(30))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Receiver target analysis
passes = pbp[(pbp["receiver_player_id"].notna()) & (pbp["play_type"] == "pass")]
receiver_targets = (passes.groupby(["receiver_player_id", "receiver_player_name", "posteam"])
.agg(
targets=("epa", "count"),
receptions=("complete_pass", "sum"),
yards=("yards_gained", "sum"),
tds=("pass_touchdown", "sum"),
epa=("epa", "sum"),
avg_depth=("air_yards", "mean")
)
.reset_index()
.sort_values("targets", ascending=False))
# Add target share
team_targets = receiver_targets.groupby("posteam")["targets"].transform("sum")
receiver_targets["target_share"] = receiver_targets["targets"] / team_targets * 100
print("Top Receivers by Targets:")
print(receiver_targets.head(30))
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Receiver YAC analysis
receiver_yac <- pbp %>%
filter(complete_pass == 1, !is.na(yards_after_catch)) %>%
group_by(receiver_player_id, receiver_player_name) %>%
summarize(
receptions = n(),
total_yac = sum(yards_after_catch),
avg_yac = mean(yards_after_catch),
yac_per_target = total_yac / n(),
.groups = "drop"
) %>%
filter(receptions >= 40) %>%
arrange(desc(avg_yac))
print(receiver_yac)
# Team YAC vs Air Yards balance
team_yac <- pbp %>%
filter(complete_pass == 1, !is.na(yards_after_catch)) %>%
group_by(posteam) %>%
summarize(
completions = n(),
avg_air_yards = mean(air_yards, na.rm = TRUE),
avg_yac = mean(yards_after_catch),
yac_pct = avg_yac / (avg_air_yards + avg_yac) * 100
) %>%
arrange(desc(avg_yac))
print(team_yac)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Receiver YAC analysis
completions = pbp[(pbp["complete_pass"] == 1) & (pbp["yards_after_catch"].notna())]
receiver_yac = (completions.groupby(["receiver_player_id", "receiver_player_name"])
.agg(
receptions=("yards_after_catch", "count"),
total_yac=("yards_after_catch", "sum"),
avg_yac=("yards_after_catch", "mean")
)
.reset_index())
receiver_yac = receiver_yac[receiver_yac["receptions"] >= 40].sort_values(
"avg_yac", ascending=False)
print("Top Receivers by YAC:")
print(receiver_yac.head(20))
# Team YAC analysis
team_yac = (completions.groupby("posteam")
.agg(
avg_air_yards=("air_yards", "mean"),
avg_yac=("yards_after_catch", "mean")
)
.reset_index()
.sort_values("avg_yac", ascending=False))
print("\nTeam YAC Rankings:")
print(team_yac)
nflfastR
tidyverse
nfl_data_py
pandas
nflfastR - Play-by-play data with EPAnflplotR - NFL team logos & plottingtidyverse - Data manipulation & visualizationggplot2 - Advanced visualizationsnfl_data_py - NFL data (nflverse compatible)pandas - Data manipulationmatplotlib - Visualizationsscikit-learn - Machine learningLearn the theory behind these techniques in our comprehensive tutorial series
Browse Tutorials