Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.
Running back and rushing game analytics
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Team rushing analysis
team_rushing <- pbp %>%
filter(play_type == "run", !is.na(epa)) %>%
group_by(posteam) %>%
summarize(
rush_attempts = n(),
avg_yards = mean(yards_gained),
rush_epa = mean(epa),
success_rate = mean(success) * 100,
.groups = "drop"
) %>%
arrange(desc(rush_epa))
print(team_rushing)
# RB efficiency
rb_efficiency <- pbp %>%
filter(!is.na(rusher_player_id), play_type == "run") %>%
group_by(rusher_player_id, rusher_player_name) %>%
summarize(
attempts = n(),
yards = sum(yards_gained),
ypc = mean(yards_gained),
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success) * 100,
.groups = "drop"
) %>%
filter(attempts >= 100) %>%
arrange(desc(epa))
print(rb_efficiency)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Team rushing analysis
rushes = pbp[(pbp["play_type"] == "run") & (pbp["epa"].notna())]
team_rushing = (rushes.groupby("posteam")
.agg(
rush_attempts=("epa", "count"),
avg_yards=("yards_gained", "mean"),
rush_epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index()
.sort_values("rush_epa", ascending=False))
print("Team Rushing Rankings:")
print(team_rushing)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Run direction analysis
run_direction <- pbp %>%
filter(play_type == "run", !is.na(run_location), !is.na(run_gap)) %>%
group_by(run_location, run_gap) %>%
summarize(
attempts = n(),
avg_yards = mean(yards_gained),
epa = mean(epa, na.rm = TRUE),
success_rate = mean(success) * 100,
.groups = "drop"
) %>%
arrange(desc(epa))
print(run_direction)
# Team run direction tendencies
team_direction <- pbp %>%
filter(play_type == "run", !is.na(run_location)) %>%
group_by(posteam, run_location) %>%
summarize(attempts = n(), .groups = "drop") %>%
group_by(posteam) %>%
mutate(pct = attempts / sum(attempts) * 100) %>%
pivot_wider(names_from = run_location, values_from = c(attempts, pct))
print(team_direction)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Run direction analysis
rushes = pbp[(pbp["play_type"] == "run") &
(pbp["run_location"].notna()) &
(pbp["run_gap"].notna())]
run_direction = (rushes.groupby(["run_location", "run_gap"])
.agg(
attempts=("epa", "count"),
avg_yards=("yards_gained", "mean"),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index()
.sort_values("epa", ascending=False))
print("Run Efficiency by Direction:")
print(run_direction)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Box count analysis
box_analysis <- pbp %>%
filter(play_type == "run", !is.na(defenders_in_box), !is.na(epa)) %>%
group_by(defenders_in_box) %>%
summarize(
attempts = n(),
avg_yards = mean(yards_gained),
epa = mean(epa),
success_rate = mean(success) * 100,
.groups = "drop"
) %>%
filter(attempts >= 100)
print(box_analysis)
# Team success against stacked boxes (8+)
stacked_box <- pbp %>%
filter(play_type == "run", defenders_in_box >= 8, !is.na(epa)) %>%
group_by(posteam) %>%
summarize(
stacked_attempts = n(),
stacked_epa = mean(epa),
stacked_success = mean(success) * 100
) %>%
arrange(desc(stacked_epa))
print(stacked_box)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Box count analysis
rushes = pbp[(pbp["play_type"] == "run") &
(pbp["defenders_in_box"].notna()) &
(pbp["epa"].notna())]
box_analysis = (rushes.groupby("defenders_in_box")
.agg(
attempts=("epa", "count"),
avg_yards=("yards_gained", "mean"),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
box_analysis = box_analysis[box_analysis["attempts"] >= 100]
print("Rushing by Defenders in Box:")
print(box_analysis)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Goal line rushing (inside 5 yard line)
goal_line <- pbp %>%
filter(play_type == "run", yardline_100 <= 5, !is.na(epa))
# Overall goal line rushing
goal_line %>%
summarize(
attempts = n(),
td_rate = mean(rush_touchdown) * 100,
success_rate = mean(success) * 100,
avg_yards = mean(yards_gained)
)
# Team goal line rushing
team_goal_line <- goal_line %>%
group_by(posteam) %>%
summarize(
attempts = n(),
touchdowns = sum(rush_touchdown),
td_rate = mean(rush_touchdown) * 100,
success_rate = mean(success) * 100
) %>%
filter(attempts >= 10) %>%
arrange(desc(td_rate))
print(team_goal_line)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Goal line rushing
goal_line = pbp[(pbp["play_type"] == "run") &
(pbp["yardline_100"] <= 5) &
(pbp["epa"].notna())]
# Team goal line rushing
team_goal_line = (goal_line.groupby("posteam")
.agg(
attempts=("rush_touchdown", "count"),
touchdowns=("rush_touchdown", "sum"),
td_rate=("rush_touchdown", lambda x: x.mean() * 100),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
team_goal_line = team_goal_line[team_goal_line["attempts"] >= 10].sort_values(
"td_rate", ascending=False)
print("Goal Line Rushing Efficiency:")
print(team_goal_line)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# RB rushing stats
rb_rush <- pbp %>%
filter(!is.na(rusher_player_id), play_type == "run") %>%
group_by(rusher_player_id) %>%
summarize(
rush_att = n(),
rush_yards = sum(yards_gained),
rush_epa = sum(epa, na.rm = TRUE),
.groups = "drop"
)
# RB receiving stats
rb_rec <- pbp %>%
filter(!is.na(receiver_player_id), play_type == "pass") %>%
group_by(receiver_player_id) %>%
summarize(
targets = n(),
receptions = sum(complete_pass),
rec_yards = sum(yards_gained[complete_pass == 1], na.rm = TRUE),
rec_epa = sum(epa, na.rm = TRUE),
.groups = "drop"
)
# Combine (simple join by player ID)
rb_combined <- rb_rush %>%
inner_join(rb_rec, by = c("rusher_player_id" = "receiver_player_id")) %>%
filter(rush_att >= 50) %>%
mutate(
total_epa = rush_epa + rec_epa,
rec_epa_pct = rec_epa / total_epa * 100
) %>%
arrange(desc(total_epa))
print(rb_combined)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# RB rushing stats
rushes = pbp[(pbp["rusher_player_id"].notna()) & (pbp["play_type"] == "run")]
rb_rush = (rushes.groupby("rusher_player_id")
.agg(
rush_att=("epa", "count"),
rush_yards=("yards_gained", "sum"),
rush_epa=("epa", "sum")
)
.reset_index())
# RB receiving stats
receptions = pbp[(pbp["receiver_player_id"].notna()) & (pbp["play_type"] == "pass")]
rb_rec = (receptions.groupby("receiver_player_id")
.agg(
targets=("epa", "count"),
receptions=("complete_pass", "sum"),
rec_epa=("epa", "sum")
)
.reset_index())
# Combine
rb_combined = rb_rush.merge(rb_rec, left_on="rusher_player_id",
right_on="receiver_player_id", how="inner")
rb_combined = rb_combined[rb_combined["rush_att"] >= 50]
rb_combined["total_epa"] = rb_combined["rush_epa"] + rb_combined["rec_epa"]
print("RB Total EPA (Rush + Receiving):")
print(rb_combined.sort_values("total_epa", ascending=False).head(20))
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Rush success by formation
formation_rush <- pbp %>%
filter(play_type == "run", !is.na(epa), !is.na(offense_formation)) %>%
group_by(offense_formation) %>%
summarize(
attempts = n(),
avg_yards = mean(yards_gained),
epa = mean(epa),
success_rate = mean(success) * 100,
.groups = "drop"
) %>%
filter(attempts >= 100) %>%
arrange(desc(epa))
print(formation_rush)
# Shotgun vs Under Center rushing
pbp %>%
filter(play_type == "run", !is.na(epa)) %>%
mutate(
shotgun = if_else(offense_formation == "SHOTGUN", "Shotgun", "Under Center")
) %>%
group_by(shotgun) %>%
summarize(
attempts = n(),
avg_yards = mean(yards_gained),
epa = mean(epa),
success_rate = mean(success) * 100
)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Rush success by formation
rushes = pbp[(pbp["play_type"] == "run") &
(pbp["epa"].notna()) &
(pbp["offense_formation"].notna())]
formation_rush = (rushes.groupby("offense_formation")
.agg(
attempts=("epa", "count"),
avg_yards=("yards_gained", "mean"),
epa=("epa", "mean"),
success_rate=("success", lambda x: x.mean() * 100)
)
.reset_index())
formation_rush = formation_rush[formation_rush["attempts"] >= 100].sort_values(
"epa", ascending=False)
print("Rushing Efficiency by Formation:")
print(formation_rush)
nflfastR
tidyverse
nfl_data_py
pandas
nflfastR - Play-by-play data with EPAnflplotR - NFL team logos & plottingtidyverse - Data manipulation & visualizationggplot2 - Advanced visualizationsnfl_data_py - NFL data (nflverse compatible)pandas - Data manipulationmatplotlib - Visualizationsscikit-learn - Machine learningLearn the theory behind these techniques in our comprehensive tutorial series
Browse Tutorials