Code Examples

Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.

122 Examples
R & Python Support: All examples include both R and Python versions. Click the tabs to switch between languages. Use the copy button to copy code to clipboard.

Rushing Analysis

Running back and rushing game analytics

Yards Before Contact
Analyze offensive line blocking using yards before contact.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Team rushing analysis
team_rushing <- pbp %>%
  filter(play_type == "run", !is.na(epa)) %>%
  group_by(posteam) %>%
  summarize(
    rush_attempts = n(),
    avg_yards = mean(yards_gained),
    rush_epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  arrange(desc(rush_epa))

print(team_rushing)

# RB efficiency
rb_efficiency <- pbp %>%
  filter(!is.na(rusher_player_id), play_type == "run") %>%
  group_by(rusher_player_id, rusher_player_name) %>%
  summarize(
    attempts = n(),
    yards = sum(yards_gained),
    ypc = mean(yards_gained),
    epa = mean(epa, na.rm = TRUE),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 100) %>%
  arrange(desc(epa))

print(rb_efficiency)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Team rushing analysis
rushes = pbp[(pbp["play_type"] == "run") & (pbp["epa"].notna())]

team_rushing = (rushes.groupby("posteam")
    .agg(
        rush_attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        rush_epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index()
    .sort_values("rush_epa", ascending=False))

print("Team Rushing Rankings:")
print(team_rushing)
Packages: nflfastR tidyverse nfl_data_py pandas
Run Direction Efficiency
Compare rushing efficiency by run gap and direction.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Run direction analysis
run_direction <- pbp %>%
  filter(play_type == "run", !is.na(run_location), !is.na(run_gap)) %>%
  group_by(run_location, run_gap) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa, na.rm = TRUE),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  arrange(desc(epa))

print(run_direction)

# Team run direction tendencies
team_direction <- pbp %>%
  filter(play_type == "run", !is.na(run_location)) %>%
  group_by(posteam, run_location) %>%
  summarize(attempts = n(), .groups = "drop") %>%
  group_by(posteam) %>%
  mutate(pct = attempts / sum(attempts) * 100) %>%
  pivot_wider(names_from = run_location, values_from = c(attempts, pct))

print(team_direction)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Run direction analysis
rushes = pbp[(pbp["play_type"] == "run") &
             (pbp["run_location"].notna()) &
             (pbp["run_gap"].notna())]

run_direction = (rushes.groupby(["run_location", "run_gap"])
    .agg(
        attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index()
    .sort_values("epa", ascending=False))

print("Run Efficiency by Direction:")
print(run_direction)
Packages: nflfastR tidyverse nfl_data_py pandas
Box Count Impact
Analyze how defenders in the box affect rushing success.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Box count analysis
box_analysis <- pbp %>%
  filter(play_type == "run", !is.na(defenders_in_box), !is.na(epa)) %>%
  group_by(defenders_in_box) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 100)

print(box_analysis)

# Team success against stacked boxes (8+)
stacked_box <- pbp %>%
  filter(play_type == "run", defenders_in_box >= 8, !is.na(epa)) %>%
  group_by(posteam) %>%
  summarize(
    stacked_attempts = n(),
    stacked_epa = mean(epa),
    stacked_success = mean(success) * 100
  ) %>%
  arrange(desc(stacked_epa))

print(stacked_box)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Box count analysis
rushes = pbp[(pbp["play_type"] == "run") &
             (pbp["defenders_in_box"].notna()) &
             (pbp["epa"].notna())]

box_analysis = (rushes.groupby("defenders_in_box")
    .agg(
        attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

box_analysis = box_analysis[box_analysis["attempts"] >= 100]
print("Rushing by Defenders in Box:")
print(box_analysis)
Packages: nflfastR tidyverse nfl_data_py pandas
Goal Line Rushing
Analyze rushing efficiency in goal-to-go situations.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Goal line rushing (inside 5 yard line)
goal_line <- pbp %>%
  filter(play_type == "run", yardline_100 <= 5, !is.na(epa))

# Overall goal line rushing
goal_line %>%
  summarize(
    attempts = n(),
    td_rate = mean(rush_touchdown) * 100,
    success_rate = mean(success) * 100,
    avg_yards = mean(yards_gained)
  )

# Team goal line rushing
team_goal_line <- goal_line %>%
  group_by(posteam) %>%
  summarize(
    attempts = n(),
    touchdowns = sum(rush_touchdown),
    td_rate = mean(rush_touchdown) * 100,
    success_rate = mean(success) * 100
  ) %>%
  filter(attempts >= 10) %>%
  arrange(desc(td_rate))

print(team_goal_line)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Goal line rushing
goal_line = pbp[(pbp["play_type"] == "run") &
                (pbp["yardline_100"] <= 5) &
                (pbp["epa"].notna())]

# Team goal line rushing
team_goal_line = (goal_line.groupby("posteam")
    .agg(
        attempts=("rush_touchdown", "count"),
        touchdowns=("rush_touchdown", "sum"),
        td_rate=("rush_touchdown", lambda x: x.mean() * 100),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

team_goal_line = team_goal_line[team_goal_line["attempts"] >= 10].sort_values(
    "td_rate", ascending=False)

print("Goal Line Rushing Efficiency:")
print(team_goal_line)
Packages: nflfastR tidyverse nfl_data_py pandas
RB Receiving vs Rushing Split
Compare running back value as rushers vs receivers.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# RB rushing stats
rb_rush <- pbp %>%
  filter(!is.na(rusher_player_id), play_type == "run") %>%
  group_by(rusher_player_id) %>%
  summarize(
    rush_att = n(),
    rush_yards = sum(yards_gained),
    rush_epa = sum(epa, na.rm = TRUE),
    .groups = "drop"
  )

# RB receiving stats
rb_rec <- pbp %>%
  filter(!is.na(receiver_player_id), play_type == "pass") %>%
  group_by(receiver_player_id) %>%
  summarize(
    targets = n(),
    receptions = sum(complete_pass),
    rec_yards = sum(yards_gained[complete_pass == 1], na.rm = TRUE),
    rec_epa = sum(epa, na.rm = TRUE),
    .groups = "drop"
  )

# Combine (simple join by player ID)
rb_combined <- rb_rush %>%
  inner_join(rb_rec, by = c("rusher_player_id" = "receiver_player_id")) %>%
  filter(rush_att >= 50) %>%
  mutate(
    total_epa = rush_epa + rec_epa,
    rec_epa_pct = rec_epa / total_epa * 100
  ) %>%
  arrange(desc(total_epa))

print(rb_combined)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# RB rushing stats
rushes = pbp[(pbp["rusher_player_id"].notna()) & (pbp["play_type"] == "run")]
rb_rush = (rushes.groupby("rusher_player_id")
    .agg(
        rush_att=("epa", "count"),
        rush_yards=("yards_gained", "sum"),
        rush_epa=("epa", "sum")
    )
    .reset_index())

# RB receiving stats
receptions = pbp[(pbp["receiver_player_id"].notna()) & (pbp["play_type"] == "pass")]
rb_rec = (receptions.groupby("receiver_player_id")
    .agg(
        targets=("epa", "count"),
        receptions=("complete_pass", "sum"),
        rec_epa=("epa", "sum")
    )
    .reset_index())

# Combine
rb_combined = rb_rush.merge(rb_rec, left_on="rusher_player_id",
                             right_on="receiver_player_id", how="inner")
rb_combined = rb_combined[rb_combined["rush_att"] >= 50]
rb_combined["total_epa"] = rb_combined["rush_epa"] + rb_combined["rec_epa"]

print("RB Total EPA (Rush + Receiving):")
print(rb_combined.sort_values("total_epa", ascending=False).head(20))
Packages: nflfastR tidyverse nfl_data_py pandas
Rush Success by Formation
Analyze rushing efficiency from different offensive formations.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Rush success by formation
formation_rush <- pbp %>%
  filter(play_type == "run", !is.na(epa), !is.na(offense_formation)) %>%
  group_by(offense_formation) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 100) %>%
  arrange(desc(epa))

print(formation_rush)

# Shotgun vs Under Center rushing
pbp %>%
  filter(play_type == "run", !is.na(epa)) %>%
  mutate(
    shotgun = if_else(offense_formation == "SHOTGUN", "Shotgun", "Under Center")
  ) %>%
  group_by(shotgun) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa),
    success_rate = mean(success) * 100
  )
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Rush success by formation
rushes = pbp[(pbp["play_type"] == "run") &
             (pbp["epa"].notna()) &
             (pbp["offense_formation"].notna())]

formation_rush = (rushes.groupby("offense_formation")
    .agg(
        attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

formation_rush = formation_rush[formation_rush["attempts"] >= 100].sort_values(
    "epa", ascending=False)

print("Rushing Efficiency by Formation:")
print(formation_rush)
Packages: nflfastR tidyverse nfl_data_py pandas
Quick Package Reference
R Packages
  • nflfastR - Play-by-play data with EPA
  • nflplotR - NFL team logos & plotting
  • tidyverse - Data manipulation & visualization
  • ggplot2 - Advanced visualizations
Python Packages
  • nfl_data_py - NFL data (nflverse compatible)
  • pandas - Data manipulation
  • matplotlib - Visualizations
  • scikit-learn - Machine learning

Ready to Dive Deeper?

Learn the theory behind these techniques in our comprehensive tutorial series

Browse Tutorials