Code Examples

Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.

122 Examples
R & Python Support: All examples include both R and Python versions. Click the tabs to switch between languages. Use the copy button to copy code to clipboard.

Passing Analysis

Deep dive into quarterback and passing game analytics

Air Yards Analysis
Analyze depth of target and air yards distribution.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# QB air yards analysis
qb_air_yards <- pbp %>%
  filter(!is.na(passer_player_id), !is.na(air_yards)) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    attempts = n(),
    total_air_yards = sum(air_yards),
    avg_air_yards = mean(air_yards),
    deep_pct = mean(air_yards >= 20) * 100,
    short_pct = mean(air_yards < 5) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 200) %>%
  arrange(desc(avg_air_yards))

print(qb_air_yards)

# Air yards by result
pbp %>%
  filter(!is.na(air_yards)) %>%
  mutate(result = if_else(complete_pass == 1, "Complete", "Incomplete")) %>%
  group_by(result) %>%
  summarize(avg_air_yards = mean(air_yards))
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# QB air yards analysis
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["air_yards"].notna())]

qb_air_yards = (passes.groupby(["passer_player_id", "passer_player_name"])
    .agg(
        attempts=("air_yards", "count"),
        total_air_yards=("air_yards", "sum"),
        avg_air_yards=("air_yards", "mean"),
        deep_pct=("air_yards", lambda x: (x >= 20).mean() * 100)
    )
    .reset_index())

qb_air_yards = qb_air_yards[qb_air_yards["attempts"] >= 200].sort_values(
    "avg_air_yards", ascending=False)

print("QB Air Yards Analysis:")
print(qb_air_yards)
Packages: nflfastR tidyverse nfl_data_py pandas
Completion Percentage Over Expected (CPOE)
Analyze quarterback accuracy using CPOE metric.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# CPOE analysis
qb_cpoe <- pbp %>%
  filter(!is.na(passer_player_id), !is.na(cpoe)) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    attempts = n(),
    completion_pct = mean(complete_pass) * 100,
    exp_completion_pct = mean(cp) * 100,
    cpoe = mean(cpoe),
    .groups = "drop"
  ) %>%
  filter(attempts >= 200) %>%
  arrange(desc(cpoe))

print(qb_cpoe)

# CPOE by depth of target
pbp %>%
  filter(!is.na(cpoe), !is.na(air_yards)) %>%
  mutate(
    depth = case_when(
      air_yards < 0 ~ "Behind LOS",
      air_yards < 10 ~ "Short (0-9)",
      air_yards < 20 ~ "Medium (10-19)",
      TRUE ~ "Deep (20+)"
    )
  ) %>%
  group_by(depth) %>%
  summarize(
    attempts = n(),
    avg_cpoe = mean(cpoe),
    completion_rate = mean(complete_pass)
  )
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# CPOE analysis
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["cpoe"].notna())]

qb_cpoe = (passes.groupby(["passer_player_id", "passer_player_name"])
    .agg(
        attempts=("cpoe", "count"),
        completion_pct=("complete_pass", lambda x: x.mean() * 100),
        cpoe=("cpoe", "mean")
    )
    .reset_index())

qb_cpoe = qb_cpoe[qb_cpoe["attempts"] >= 200].sort_values("cpoe", ascending=False)

print("QB CPOE Rankings:")
print(qb_cpoe)
Packages: nflfastR tidyverse nfl_data_py pandas
Depth of Target Analysis
Break down passing efficiency by depth zones.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Define depth zones
depth_analysis <- pbp %>%
  filter(!is.na(air_yards), play_type == "pass") %>%
  mutate(
    depth_zone = case_when(
      air_yards < 0 ~ "Behind LOS",
      air_yards < 5 ~ "Short (0-4)",
      air_yards < 10 ~ "Intermediate (5-9)",
      air_yards < 15 ~ "Medium (10-14)",
      air_yards < 20 ~ "Deep (15-19)",
      TRUE ~ "Bomb (20+)"
    )
  ) %>%
  group_by(depth_zone) %>%
  summarize(
    attempts = n(),
    completions = sum(complete_pass),
    comp_pct = mean(complete_pass) * 100,
    avg_epa = mean(epa, na.rm = TRUE),
    td_rate = mean(pass_touchdown, na.rm = TRUE) * 100,
    int_rate = mean(interception) * 100,
    .groups = "drop"
  )

print(depth_analysis)

# Team deep passing rankings
pbp %>%
  filter(air_yards >= 20, !is.na(epa)) %>%
  group_by(posteam) %>%
  summarize(
    deep_attempts = n(),
    deep_comp_pct = mean(complete_pass) * 100,
    deep_epa = mean(epa)
  ) %>%
  arrange(desc(deep_epa))
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Filter pass plays with air yards
passes = pbp[(pbp["air_yards"].notna()) & (pbp["play_type"] == "pass")]

# Depth zones
def get_depth_zone(ay):
    if ay < 0: return "Behind LOS"
    elif ay < 5: return "Short (0-4)"
    elif ay < 10: return "Intermediate (5-9)"
    elif ay < 15: return "Medium (10-14)"
    elif ay < 20: return "Deep (15-19)"
    else: return "Bomb (20+)"

passes["depth_zone"] = passes["air_yards"].apply(get_depth_zone)

depth_analysis = (passes.groupby("depth_zone")
    .agg(
        attempts=("air_yards", "count"),
        comp_pct=("complete_pass", lambda x: x.mean() * 100),
        avg_epa=("epa", "mean"),
        td_rate=("pass_touchdown", lambda x: x.mean() * 100)
    )
    .reset_index())

print("Passing Efficiency by Depth:")
print(depth_analysis)
Packages: nflfastR tidyverse nfl_data_py pandas
Pressure and Sack Rate
Analyze how quarterbacks perform under pressure.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# QB performance under pressure
qb_pressure <- pbp %>%
  filter(!is.na(passer_player_id), play_type == "pass") %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    dropbacks = n(),
    sacks = sum(sack),
    sack_rate = mean(sack) * 100,
    epa_no_sack = mean(epa[sack == 0], na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(dropbacks >= 200) %>%
  arrange(sack_rate)

print(qb_pressure)

# EPA with vs without pressure (using sack as proxy)
pbp %>%
  filter(play_type == "pass", !is.na(epa)) %>%
  group_by(sack = as.logical(sack)) %>%
  summarize(
    plays = n(),
    avg_epa = mean(epa)
  )
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# QB pressure analysis
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["play_type"] == "pass")]

qb_pressure = (passes.groupby(["passer_player_id", "passer_player_name"])
    .agg(
        dropbacks=("sack", "count"),
        sacks=("sack", "sum"),
        sack_rate=("sack", lambda x: x.mean() * 100)
    )
    .reset_index())

qb_pressure = qb_pressure[qb_pressure["dropbacks"] >= 200].sort_values("sack_rate")

print("QB Sack Rates (lowest is best):")
print(qb_pressure)
Packages: nflfastR tidyverse nfl_data_py pandas
Pass Location Heatmaps
Visualize pass distribution by field location.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Create pass location data
pass_locations <- pbp %>%
  filter(
    play_type == "pass",
    !is.na(air_yards),
    !is.na(pass_location)
  ) %>%
  mutate(
    x_loc = case_when(
      pass_location == "left" ~ -1,
      pass_location == "middle" ~ 0,
      pass_location == "right" ~ 1
    )
  )

# Team passing tendencies
team_tendencies <- pass_locations %>%
  group_by(posteam, pass_location) %>%
  summarize(
    attempts = n(),
    epa = mean(epa, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(posteam) %>%
  mutate(pct = attempts / sum(attempts) * 100)

# Visualize (sample for one team)
team_tendencies %>%
  filter(posteam == "KC") %>%
  ggplot(aes(x = pass_location, y = pct, fill = epa)) +
  geom_col() +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0) +
  labs(title = "KC Pass Location Tendencies", y = "% of Passes") +
  theme_minimal()
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt

pbp = nfl.import_pbp_data([2023])

# Pass location analysis
passes = pbp[
    (pbp["play_type"] == "pass") &
    (pbp["air_yards"].notna()) &
    (pbp["pass_location"].notna())
]

# Team passing tendencies
team_tendencies = (passes.groupby(["posteam", "pass_location"])
    .agg(
        attempts=("epa", "count"),
        epa=("epa", "mean")
    )
    .reset_index())

# Calculate percentages
team_totals = team_tendencies.groupby("posteam")["attempts"].transform("sum")
team_tendencies["pct"] = team_tendencies["attempts"] / team_totals * 100

print("Team Pass Location Tendencies:")
print(team_tendencies[team_tendencies["posteam"] == "KC"])
Packages: nflfastR tidyverse ggplot2 nfl_data_py pandas matplotlib
Dropback Success Rate
Calculate QB success rate on all dropback plays.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Dropback success rate (includes sacks)
qb_dropback <- pbp %>%
  filter(!is.na(passer_player_id), qb_dropback == 1) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    dropbacks = n(),
    success_rate = mean(success, na.rm = TRUE) * 100,
    epa_per_dropback = mean(epa, na.rm = TRUE),
    sack_rate = mean(sack) * 100,
    scramble_rate = mean(qb_scramble, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  filter(dropbacks >= 200) %>%
  arrange(desc(success_rate))

print(qb_dropback)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Dropback success rate
dropbacks = pbp[(pbp["passer_player_id"].notna()) & (pbp["qb_dropback"] == 1)]

qb_dropback = (dropbacks.groupby(["passer_player_id", "passer_player_name"])
    .agg(
        dropbacks=("success", "count"),
        success_rate=("success", lambda x: x.mean() * 100),
        epa_per_dropback=("epa", "mean"),
        sack_rate=("sack", lambda x: x.mean() * 100)
    )
    .reset_index())

qb_dropback = qb_dropback[qb_dropback["dropbacks"] >= 200].sort_values(
    "success_rate", ascending=False)

print("QB Dropback Success Rate:")
print(qb_dropback)
Packages: nflfastR tidyverse nfl_data_py pandas
Play Action Effectiveness
Compare play action vs standard passing efficiency.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Play action analysis
play_action <- pbp %>%
  filter(play_type == "pass", !is.na(epa)) %>%
  mutate(play_action = if_else(is.na(play_action), FALSE, play_action))

# Overall comparison
play_action %>%
  group_by(play_action) %>%
  summarize(
    attempts = n(),
    avg_epa = mean(epa),
    success_rate = mean(success) * 100,
    avg_air_yards = mean(air_yards, na.rm = TRUE)
  )

# Team play action usage and effectiveness
team_pa <- play_action %>%
  group_by(posteam) %>%
  summarize(
    total_passes = n(),
    pa_rate = mean(play_action) * 100,
    pa_epa = mean(epa[play_action]),
    no_pa_epa = mean(epa[!play_action]),
    pa_advantage = mean(epa[play_action]) - mean(epa[!play_action])
  ) %>%
  arrange(desc(pa_advantage))

print(team_pa)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Play action analysis
passes = pbp[(pbp["play_type"] == "pass") & (pbp["epa"].notna())].copy()
passes["play_action"] = passes["play_action"].fillna(False)

# Overall comparison
pa_comparison = (passes.groupby("play_action")
    .agg(
        attempts=("epa", "count"),
        avg_epa=("epa", "mean"),
        success_rate=("success", lambda x: x.mean() * 100)
    )
    .reset_index())

print("Play Action vs Standard Passing:")
print(pa_comparison)

# Team play action effectiveness
team_pa = (passes.groupby("posteam")
    .apply(lambda x: pd.Series({
        "pa_rate": x["play_action"].mean() * 100,
        "pa_epa": x[x["play_action"]]["epa"].mean(),
        "no_pa_epa": x[~x["play_action"]]["epa"].mean()
    }))
    .reset_index())

team_pa["pa_advantage"] = team_pa["pa_epa"] - team_pa["no_pa_epa"]
print("\nTeam Play Action Effectiveness:")
print(team_pa.sort_values("pa_advantage", ascending=False))
Packages: nflfastR tidyverse nfl_data_py pandas
QB Scramble Analysis
Analyze quarterback scramble efficiency and tendencies.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# QB scramble analysis
qb_scrambles <- pbp %>%
  filter(!is.na(passer_player_id)) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    dropbacks = sum(qb_dropback, na.rm = TRUE),
    scrambles = sum(qb_scramble, na.rm = TRUE),
    scramble_rate = mean(qb_scramble, na.rm = TRUE) * 100,
    scramble_yards = sum(yards_gained[qb_scramble == 1], na.rm = TRUE),
    scramble_epa = mean(epa[qb_scramble == 1], na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(dropbacks >= 200, scrambles >= 10) %>%
  arrange(desc(scramble_rate))

print(qb_scrambles)

# Scramble success by situation
pbp %>%
  filter(qb_scramble == 1) %>%
  mutate(
    situation = case_when(
      down <= 2 & ydstogo <= 5 ~ "Short yardage",
      down == 3 ~ "Third down",
      down == 4 ~ "Fourth down",
      TRUE ~ "Normal"
    )
  ) %>%
  group_by(situation) %>%
  summarize(
    scrambles = n(),
    avg_yards = mean(yards_gained),
    success_rate = mean(success)
  )
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# QB scramble analysis
qb_plays = pbp[pbp["passer_player_id"].notna()]

qb_scrambles = (qb_plays.groupby(["passer_player_id", "passer_player_name"])
    .agg(
        dropbacks=("qb_dropback", "sum"),
        scrambles=("qb_scramble", "sum"),
        scramble_rate=("qb_scramble", lambda x: x.mean() * 100)
    )
    .reset_index())

qb_scrambles = qb_scrambles[
    (qb_scrambles["dropbacks"] >= 200) &
    (qb_scrambles["scrambles"] >= 10)
].sort_values("scramble_rate", ascending=False)

print("QB Scramble Rates:")
print(qb_scrambles)
Packages: nflfastR tidyverse nfl_data_py pandas
Receiver Target Analysis
Analyze how teams distribute targets among receivers.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Top receivers by targets
receiver_targets <- pbp %>%
  filter(!is.na(receiver_player_id), play_type == "pass") %>%
  group_by(receiver_player_id, receiver_player_name, posteam) %>%
  summarize(
    targets = n(),
    receptions = sum(complete_pass),
    yards = sum(yards_gained, na.rm = TRUE),
    tds = sum(pass_touchdown),
    epa = sum(epa, na.rm = TRUE),
    avg_depth = mean(air_yards, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(targets))

# Add target share
receiver_targets <- receiver_targets %>%
  group_by(posteam) %>%
  mutate(
    team_targets = sum(targets),
    target_share = targets / team_targets * 100
  ) %>%
  ungroup()

print(receiver_targets %>% head(30))
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Receiver target analysis
passes = pbp[(pbp["receiver_player_id"].notna()) & (pbp["play_type"] == "pass")]

receiver_targets = (passes.groupby(["receiver_player_id", "receiver_player_name", "posteam"])
    .agg(
        targets=("epa", "count"),
        receptions=("complete_pass", "sum"),
        yards=("yards_gained", "sum"),
        tds=("pass_touchdown", "sum"),
        epa=("epa", "sum"),
        avg_depth=("air_yards", "mean")
    )
    .reset_index()
    .sort_values("targets", ascending=False))

# Add target share
team_targets = receiver_targets.groupby("posteam")["targets"].transform("sum")
receiver_targets["target_share"] = receiver_targets["targets"] / team_targets * 100

print("Top Receivers by Targets:")
print(receiver_targets.head(30))
Packages: nflfastR tidyverse nfl_data_py pandas
Yards After Catch Analysis
Analyze receiver YAC ability and team YAC tendencies.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Receiver YAC analysis
receiver_yac <- pbp %>%
  filter(complete_pass == 1, !is.na(yards_after_catch)) %>%
  group_by(receiver_player_id, receiver_player_name) %>%
  summarize(
    receptions = n(),
    total_yac = sum(yards_after_catch),
    avg_yac = mean(yards_after_catch),
    yac_per_target = total_yac / n(),
    .groups = "drop"
  ) %>%
  filter(receptions >= 40) %>%
  arrange(desc(avg_yac))

print(receiver_yac)

# Team YAC vs Air Yards balance
team_yac <- pbp %>%
  filter(complete_pass == 1, !is.na(yards_after_catch)) %>%
  group_by(posteam) %>%
  summarize(
    completions = n(),
    avg_air_yards = mean(air_yards, na.rm = TRUE),
    avg_yac = mean(yards_after_catch),
    yac_pct = avg_yac / (avg_air_yards + avg_yac) * 100
  ) %>%
  arrange(desc(avg_yac))

print(team_yac)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Receiver YAC analysis
completions = pbp[(pbp["complete_pass"] == 1) & (pbp["yards_after_catch"].notna())]

receiver_yac = (completions.groupby(["receiver_player_id", "receiver_player_name"])
    .agg(
        receptions=("yards_after_catch", "count"),
        total_yac=("yards_after_catch", "sum"),
        avg_yac=("yards_after_catch", "mean")
    )
    .reset_index())

receiver_yac = receiver_yac[receiver_yac["receptions"] >= 40].sort_values(
    "avg_yac", ascending=False)

print("Top Receivers by YAC:")
print(receiver_yac.head(20))

# Team YAC analysis
team_yac = (completions.groupby("posteam")
    .agg(
        avg_air_yards=("air_yards", "mean"),
        avg_yac=("yards_after_catch", "mean")
    )
    .reset_index()
    .sort_values("avg_yac", ascending=False))

print("\nTeam YAC Rankings:")
print(team_yac)
Packages: nflfastR tidyverse nfl_data_py pandas
Quick Package Reference
R Packages
  • nflfastR - Play-by-play data with EPA
  • nflplotR - NFL team logos & plotting
  • tidyverse - Data manipulation & visualization
  • ggplot2 - Advanced visualizations
Python Packages
  • nfl_data_py - NFL data (nflverse compatible)
  • pandas - Data manipulation
  • matplotlib - Visualizations
  • scikit-learn - Machine learning

Ready to Dive Deeper?

Learn the theory behind these techniques in our comprehensive tutorial series

Browse Tutorials