Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.
Analyze play-level data including formations, personnel, and sequencing
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Play type distribution
play_types <- pbp %>%
filter(!is.na(play_type)) %>%
count(play_type) %>%
mutate(pct = n / sum(n) * 100) %>%
arrange(desc(n))
print("Play Type Distribution:")
print(play_types)
# Play type by down
by_down <- pbp %>%
filter(play_type %in% c("pass", "run"), !is.na(down)) %>%
group_by(down) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
.groups = "drop"
)
print("\nPass Rate by Down:")
print(by_down)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Play type distribution
play_types = (pbp[pbp["play_type"].notna()]
.groupby("play_type")
.size()
.reset_index(name="count")
.sort_values("count", ascending=False))
play_types["pct"] = play_types["count"] / play_types["count"].sum() * 100
print("Play Type Distribution:")
print(play_types)
# Play type by down
by_down = (pbp[(pbp["play_type"].isin(["pass", "run"])) & (pbp["down"].notna())]
.groupby("down")
.agg(
plays=("play_type", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100)
)
.reset_index())
print("\nPass Rate by Down:")
print(by_down)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Formation usage
formation_usage <- pbp %>%
filter(!is.na(offense_formation), play_type %in% c("pass", "run")) %>%
group_by(offense_formation) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(plays >= 100) %>%
arrange(desc(plays))
print(formation_usage)
# Formation by team
team_formations <- pbp %>%
filter(!is.na(offense_formation), play_type %in% c("pass", "run")) %>%
group_by(posteam, offense_formation) %>%
summarize(plays = n(), .groups = "drop") %>%
group_by(posteam) %>%
mutate(pct = plays / sum(plays) * 100) %>%
arrange(posteam, desc(pct))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Formation usage
plays = pbp[(pbp["offense_formation"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
formation_usage = (plays.groupby("offense_formation")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
formation_usage = formation_usage[formation_usage["plays"] >= 100].sort_values("plays", ascending=False)
print("Formation Usage:")
print(formation_usage)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Personnel grouping analysis
personnel <- pbp %>%
filter(!is.na(offense_personnel), play_type %in% c("pass", "run")) %>%
group_by(offense_personnel) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
.groups = "drop"
) %>%
filter(plays >= 200) %>%
arrange(desc(plays))
print("Personnel Grouping Usage:")
print(personnel)
# Team personnel tendencies
team_personnel <- pbp %>%
filter(!is.na(offense_personnel), play_type %in% c("pass", "run")) %>%
group_by(posteam) %>%
mutate(team_plays = n()) %>%
group_by(posteam, offense_personnel) %>%
summarize(
plays = n(),
pct = n() / first(team_plays) * 100,
.groups = "drop"
) %>%
filter(pct >= 5) %>%
arrange(posteam, desc(pct))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Personnel grouping analysis
plays = pbp[(pbp["offense_personnel"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
personnel = (plays.groupby("offense_personnel")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
epa=("epa", "mean")
)
.reset_index())
personnel = personnel[personnel["plays"] >= 200].sort_values("plays", ascending=False)
print("Personnel Grouping Usage:")
print(personnel)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Game script analysis
game_script <- pbp %>%
filter(play_type %in% c("pass", "run"), !is.na(score_differential)) %>%
mutate(
script = case_when(
score_differential >= 14 ~ "Up 14+",
score_differential >= 7 ~ "Up 7-13",
score_differential >= 1 ~ "Up 1-6",
score_differential == 0 ~ "Tied",
score_differential >= -6 ~ "Down 1-6",
score_differential >= -13 ~ "Down 7-13",
TRUE ~ "Down 14+"
),
script = factor(script, levels = c("Down 14+", "Down 7-13", "Down 1-6",
"Tied", "Up 1-6", "Up 7-13", "Up 14+"))
) %>%
group_by(script) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
.groups = "drop"
)
print("Game Script Analysis:")
print(game_script)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Game script analysis
plays = pbp[(pbp["play_type"].isin(["pass", "run"])) & (pbp["score_differential"].notna())].copy()
def get_script(diff):
if diff >= 14: return "Up 14+"
elif diff >= 7: return "Up 7-13"
elif diff >= 1: return "Up 1-6"
elif diff == 0: return "Tied"
elif diff >= -6: return "Down 1-6"
elif diff >= -13: return "Down 7-13"
else: return "Down 14+"
plays["script"] = plays["score_differential"].apply(get_script)
game_script = (plays.groupby("script")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
epa=("epa", "mean")
)
.reset_index())
# Order properly
order = ["Down 14+", "Down 7-13", "Down 1-6", "Tied", "Up 1-6", "Up 7-13", "Up 14+"]
game_script["script"] = pd.Categorical(game_script["script"], categories=order, ordered=True)
game_script = game_script.sort_values("script")
print("Game Script Analysis:")
print(game_script)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# No-huddle analysis
no_huddle <- pbp %>%
filter(play_type %in% c("pass", "run"), !is.na(no_huddle)) %>%
mutate(no_huddle = if_else(no_huddle == 1, "No-Huddle", "Huddle")) %>%
group_by(no_huddle) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success, na.rm = TRUE) * 100,
.groups = "drop"
)
print("No-Huddle vs Huddle:")
print(no_huddle)
# Team no-huddle usage
team_no_huddle <- pbp %>%
filter(play_type %in% c("pass", "run"), no_huddle == 1) %>%
group_by(posteam) %>%
summarize(
no_huddle_plays = n(),
epa = mean(epa, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(no_huddle_plays))
print("\nTop No-Huddle Teams:")
print(head(team_no_huddle, 10))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# No-huddle analysis
plays = pbp[(pbp["play_type"].isin(["pass", "run"])) & (pbp["no_huddle"].notna())].copy()
plays["huddle_type"] = plays["no_huddle"].apply(lambda x: "No-Huddle" if x == 1 else "Huddle")
no_huddle = (plays.groupby("huddle_type")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
print("No-Huddle vs Huddle:")
print(no_huddle)
# Team usage
team_nh = (plays[plays["no_huddle"] == 1]
.groupby("posteam")
.agg(plays=("epa", "count"), epa=("epa", "mean"))
.reset_index()
.sort_values("plays", ascending=False))
print("\nTop No-Huddle Teams:")
print(team_nh.head(10))
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Shotgun analysis
shotgun_analysis <- pbp %>%
filter(play_type %in% c("pass", "run"), !is.na(shotgun)) %>%
mutate(formation = if_else(shotgun == 1, "Shotgun", "Under Center")) %>%
group_by(formation) %>%
summarize(
plays = n(),
pct = n() / nrow(.) * 100,
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success, na.rm = TRUE) * 100,
.groups = "drop"
)
print("Shotgun vs Under Center:")
print(shotgun_analysis)
# By play type
by_play_type <- pbp %>%
filter(play_type %in% c("pass", "run"), !is.na(shotgun)) %>%
mutate(formation = if_else(shotgun == 1, "Shotgun", "Under Center")) %>%
group_by(formation, play_type) %>%
summarize(
plays = n(),
epa = mean(epa, na.rm = TRUE),
.groups = "drop"
)
print("\nBy Play Type:")
print(by_play_type)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Shotgun analysis
plays = pbp[(pbp["play_type"].isin(["pass", "run"])) & (pbp["shotgun"].notna())].copy()
plays["formation"] = plays["shotgun"].apply(lambda x: "Shotgun" if x == 1 else "Under Center")
shotgun_analysis = (plays.groupby("formation")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
shotgun_analysis["pct"] = shotgun_analysis["plays"] / shotgun_analysis["plays"].sum() * 100
print("Shotgun vs Under Center:")
print(shotgun_analysis)
# By play type
by_type = (plays.groupby(["formation", "play_type"])
.agg(plays=("epa", "count"), epa=("epa", "mean"))
.reset_index())
print("\nBy Play Type:")
print(by_type)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Motion analysis
motion_analysis <- pbp %>%
filter(play_type %in% c("pass", "run"), !is.na(is_motion)) %>%
mutate(motion = if_else(is_motion == 1, "Motion", "No Motion")) %>%
group_by(motion) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success, na.rm = TRUE) * 100,
.groups = "drop"
)
print("Motion Impact on Play Success:")
print(motion_analysis)
# Team motion usage
team_motion <- pbp %>%
filter(play_type %in% c("pass", "run")) %>%
group_by(posteam) %>%
summarize(
total_plays = n(),
motion_plays = sum(is_motion == 1, na.rm = TRUE),
motion_rate = mean(is_motion == 1, na.rm = TRUE) * 100,
motion_epa = mean(epa[is_motion == 1], na.rm = TRUE),
no_motion_epa = mean(epa[is_motion != 1 | is.na(is_motion)], na.rm = TRUE),
.groups = "drop"
) %>%
mutate(motion_advantage = motion_epa - no_motion_epa) %>%
arrange(desc(motion_rate))
print("\nTeam Motion Usage:")
print(head(team_motion, 10))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Motion analysis
plays = pbp[(pbp["play_type"].isin(["pass", "run"]))].copy()
motion = plays[plays["is_motion"] == 1]
no_motion = plays[(plays["is_motion"] != 1) | (plays["is_motion"].isna())]
print("Motion Impact on Play Success:")
print(f"With Motion: {len(motion)} plays, EPA: {motion[\"epa\"].mean():.3f}")
print(f"No Motion: {len(no_motion)} plays, EPA: {no_motion[\"epa\"].mean():.3f}")
# Team motion usage
def calc_motion_stats(group):
motion_plays = group[group["is_motion"] == 1]
return pd.Series({
"total_plays": len(group),
"motion_plays": len(motion_plays),
"motion_rate": len(motion_plays) / len(group) * 100 if len(group) > 0 else 0,
"motion_epa": motion_plays["epa"].mean() if len(motion_plays) > 0 else 0
})
team_motion = plays.groupby("posteam").apply(calc_motion_stats).reset_index()
team_motion = team_motion.sort_values("motion_rate", ascending=False)
print("\nTeam Motion Usage:")
print(team_motion.head(10))
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Add previous play info
play_sequence <- pbp %>%
filter(play_type %in% c("pass", "run")) %>%
group_by(game_id, posteam) %>%
arrange(play_id) %>%
mutate(
prev_play = lag(play_type),
prev_success = lag(success),
prev_epa = lag(epa)
) %>%
ungroup() %>%
filter(!is.na(prev_play))
# Play calling after success/failure
after_result <- play_sequence %>%
mutate(prev_result = if_else(prev_success == 1, "After Success", "After Failure")) %>%
group_by(prev_result) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
epa = mean(epa, na.rm = TRUE),
.groups = "drop"
)
print("Play Calling After Success/Failure:")
print(after_result)
# Run-Run, Run-Pass, Pass-Run, Pass-Pass sequences
sequences <- play_sequence %>%
mutate(sequence = paste(prev_play, play_type, sep = " -> ")) %>%
group_by(sequence) %>%
summarize(
plays = n(),
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
arrange(desc(plays))
print("\nPlay Sequences:")
print(sequences)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Filter and sort
plays = pbp[pbp["play_type"].isin(["pass", "run"])].copy()
plays = plays.sort_values(["game_id", "play_id"])
# Add previous play info
plays["prev_play"] = plays.groupby(["game_id", "posteam"])["play_type"].shift(1)
plays["prev_success"] = plays.groupby(["game_id", "posteam"])["success"].shift(1)
plays = plays[plays["prev_play"].notna()]
# After success/failure
plays["prev_result"] = plays["prev_success"].apply(
lambda x: "After Success" if x == 1 else "After Failure")
after_result = (plays.groupby("prev_result")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
epa=("epa", "mean")
)
.reset_index())
print("Play Calling After Success/Failure:")
print(after_result)
# Sequences
plays["sequence"] = plays["prev_play"] + " -> " + plays["play_type"]
sequences = (plays.groupby("sequence")
.agg(
plays=("epa", "count"),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index()
.sort_values("plays", ascending=False))
print("\nPlay Sequences:")
print(sequences)
nflfastR
tidyverse
nfl_data_py
pandas
nflfastR - Play-by-play data with EPAnflplotR - NFL team logos & plottingtidyverse - Data manipulation & visualizationggplot2 - Advanced visualizationsnfl_data_py - NFL data (nflverse compatible)pandas - Data manipulationmatplotlib - Visualizationsscikit-learn - Machine learningLearn the theory behind these techniques in our comprehensive tutorial series
Browse Tutorials