NFL Analytics Code Examples - R & Python Scripts

Rushing Analysis

Running back and rushing game analytics

Yards Before Contact

Analyze offensive line blocking using yards before contact.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Team rushing analysis
team_rushing <- pbp %>%
  filter(play_type == "run", !is.na(epa)) %>%
  group_by(posteam) %>%
  summarize(
    rush_attempts = n(),
    avg_yards = mean(yards_gained),
    rush_epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  arrange(desc(rush_epa))

print(team_rushing)

# RB efficiency
rb_efficiency <- pbp %>%
  filter(!is.na(rusher_player_id), play_type == "run") %>%
  group_by(rusher_player_id, rusher_player_name) %>%
  summarize(
    attempts = n(),
    yards = sum(yards_gained),
    ypc = mean(yards_gained),
    epa = mean(epa, na.rm = TRUE),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 100) %>%
  arrange(desc(epa))

print(rb_efficiency)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Team rushing analysis
rushes = pbp[(pbp["play_type"] == "run") & (pbp["epa"].notna())]

team_rushing = (rushes.groupby("posteam")
    .agg(
        rush_attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        rush_epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index()
    .sort_values("rush_epa", ascending=False))

print("Team Rushing Rankings:")
print(team_rushing)

Packages: nflfastR tidyverse nfl_data_py pandas

Run Direction Efficiency

Compare rushing efficiency by run gap and direction.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Run direction analysis
run_direction <- pbp %>%
  filter(play_type == "run", !is.na(run_location), !is.na(run_gap)) %>%
  group_by(run_location, run_gap) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa, na.rm = TRUE),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  arrange(desc(epa))

print(run_direction)

# Team run direction tendencies
team_direction <- pbp %>%
  filter(play_type == "run", !is.na(run_location)) %>%
  group_by(posteam, run_location) %>%
  summarize(attempts = n(), .groups = "drop") %>%
  group_by(posteam) %>%
  mutate(pct = attempts / sum(attempts) * 100) %>%
  pivot_wider(names_from = run_location, values_from = c(attempts, pct))

print(team_direction)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Run direction analysis
rushes = pbp[(pbp["play_type"] == "run") &
             (pbp["run_location"].notna()) &
             (pbp["run_gap"].notna())]

run_direction = (rushes.groupby(["run_location", "run_gap"])
    .agg(
        attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index()
    .sort_values("epa", ascending=False))

print("Run Efficiency by Direction:")
print(run_direction)

Packages: nflfastR tidyverse nfl_data_py pandas

Box Count Impact

Analyze how defenders in the box affect rushing success.

Advanced

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Box count analysis
box_analysis <- pbp %>%
  filter(play_type == "run", !is.na(defenders_in_box), !is.na(epa)) %>%
  group_by(defenders_in_box) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 100)

print(box_analysis)

# Team success against stacked boxes (8+)
stacked_box <- pbp %>%
  filter(play_type == "run", defenders_in_box >= 8, !is.na(epa)) %>%
  group_by(posteam) %>%
  summarize(
    stacked_attempts = n(),
    stacked_epa = mean(epa),
    stacked_success = mean(success) * 100
  ) %>%
  arrange(desc(stacked_epa))

print(stacked_box)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Box count analysis
rushes = pbp[(pbp["play_type"] == "run") &
             (pbp["defenders_in_box"].notna()) &
             (pbp["epa"].notna())]

box_analysis = (rushes.groupby("defenders_in_box")
    .agg(
        attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

box_analysis = box_analysis[box_analysis["attempts"] >= 100]
print("Rushing by Defenders in Box:")
print(box_analysis)

Packages: nflfastR tidyverse nfl_data_py pandas

Goal Line Rushing

Analyze rushing efficiency in goal-to-go situations.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Goal line rushing (inside 5 yard line)
goal_line <- pbp %>%
  filter(play_type == "run", yardline_100 <= 5, !is.na(epa))

# Overall goal line rushing
goal_line %>%
  summarize(
    attempts = n(),
    td_rate = mean(rush_touchdown) * 100,
    success_rate = mean(success) * 100,
    avg_yards = mean(yards_gained)
  )

# Team goal line rushing
team_goal_line <- goal_line %>%
  group_by(posteam) %>%
  summarize(
    attempts = n(),
    touchdowns = sum(rush_touchdown),
    td_rate = mean(rush_touchdown) * 100,
    success_rate = mean(success) * 100
  ) %>%
  filter(attempts >= 10) %>%
  arrange(desc(td_rate))

print(team_goal_line)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Goal line rushing
goal_line = pbp[(pbp["play_type"] == "run") &
                (pbp["yardline_100"] <= 5) &
                (pbp["epa"].notna())]

# Team goal line rushing
team_goal_line = (goal_line.groupby("posteam")
    .agg(
        attempts=("rush_touchdown", "count"),
        touchdowns=("rush_touchdown", "sum"),
        td_rate=("rush_touchdown", lambda x: x.mean() * 100),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

team_goal_line = team_goal_line[team_goal_line["attempts"] >= 10].sort_values(
    "td_rate", ascending=False)

print("Goal Line Rushing Efficiency:")
print(team_goal_line)

Packages: nflfastR tidyverse nfl_data_py pandas

RB Receiving vs Rushing Split

Compare running back value as rushers vs receivers.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# RB rushing stats
rb_rush <- pbp %>%
  filter(!is.na(rusher_player_id), play_type == "run") %>%
  group_by(rusher_player_id) %>%
  summarize(
    rush_att = n(),
    rush_yards = sum(yards_gained),
    rush_epa = sum(epa, na.rm = TRUE),
    .groups = "drop"
  )

# RB receiving stats
rb_rec <- pbp %>%
  filter(!is.na(receiver_player_id), play_type == "pass") %>%
  group_by(receiver_player_id) %>%
  summarize(
    targets = n(),
    receptions = sum(complete_pass),
    rec_yards = sum(yards_gained[complete_pass == 1], na.rm = TRUE),
    rec_epa = sum(epa, na.rm = TRUE),
    .groups = "drop"
  )

# Combine (simple join by player ID)
rb_combined <- rb_rush %>%
  inner_join(rb_rec, by = c("rusher_player_id" = "receiver_player_id")) %>%
  filter(rush_att >= 50) %>%
  mutate(
    total_epa = rush_epa + rec_epa,
    rec_epa_pct = rec_epa / total_epa * 100
  ) %>%
  arrange(desc(total_epa))

print(rb_combined)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# RB rushing stats
rushes = pbp[(pbp["rusher_player_id"].notna()) & (pbp["play_type"] == "run")]
rb_rush = (rushes.groupby("rusher_player_id")
    .agg(
        rush_att=("epa", "count"),
        rush_yards=("yards_gained", "sum"),
        rush_epa=("epa", "sum")
    )
    .reset_index())

# RB receiving stats
receptions = pbp[(pbp["receiver_player_id"].notna()) & (pbp["play_type"] == "pass")]
rb_rec = (receptions.groupby("receiver_player_id")
    .agg(
        targets=("epa", "count"),
        receptions=("complete_pass", "sum"),
        rec_epa=("epa", "sum")
    )
    .reset_index())

# Combine
rb_combined = rb_rush.merge(rb_rec, left_on="rusher_player_id",
                             right_on="receiver_player_id", how="inner")
rb_combined = rb_combined[rb_combined["rush_att"] >= 50]
rb_combined["total_epa"] = rb_combined["rush_epa"] + rb_combined["rec_epa"]

print("RB Total EPA (Rush + Receiving):")
print(rb_combined.sort_values("total_epa", ascending=False).head(20))

Packages: nflfastR tidyverse nfl_data_py pandas

Rush Success by Formation

Analyze rushing efficiency from different offensive formations.

Advanced

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Rush success by formation
formation_rush <- pbp %>%
  filter(play_type == "run", !is.na(epa), !is.na(offense_formation)) %>%
  group_by(offense_formation) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 100) %>%
  arrange(desc(epa))

print(formation_rush)

# Shotgun vs Under Center rushing
pbp %>%
  filter(play_type == "run", !is.na(epa)) %>%
  mutate(
    shotgun = if_else(offense_formation == "SHOTGUN", "Shotgun", "Under Center")
  ) %>%
  group_by(shotgun) %>%
  summarize(
    attempts = n(),
    avg_yards = mean(yards_gained),
    epa = mean(epa),
    success_rate = mean(success) * 100
  )

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Rush success by formation
rushes = pbp[(pbp["play_type"] == "run") &
             (pbp["epa"].notna()) &
             (pbp["offense_formation"].notna())]

formation_rush = (rushes.groupby("offense_formation")
    .agg(
        attempts=("epa", "count"),
        avg_yards=("yards_gained", "mean"),
        epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

formation_rush = formation_rush[formation_rush["attempts"] >= 100].sort_values(
    "epa", ascending=False)

print("Rushing Efficiency by Formation:")
print(formation_rush)

Packages: nflfastR tidyverse nfl_data_py pandas

Code Examples

Rushing Analysis

Yards Before Contact

Run Direction Efficiency

Box Count Impact

Goal Line Rushing

RB Receiving vs Rushing Split

Rush Success by Formation

Quick Package Reference

R Packages

Python Packages

Ready to Dive Deeper?