Code Examples

Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.

122 Examples
R & Python Support: All examples include both R and Python versions. Click the tabs to switch between languages. Use the copy button to copy code to clipboard.

Win Probability

Analyze win probability, WPA, and clutch performance metrics

Basic Win Probability Lookup
Access and understand win probability values in play-by-play data.
Beginner
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Win probability is already calculated in pbp data
# Key columns: wp, vegas_wp, home_wp, away_wp

# View WP at key moments
key_plays <- pbp %>%
  filter(!is.na(wp), !is.na(desc)) %>%
  select(game_id, qtr, time, posteam, desc, wp, wpa) %>%
  head(20)

print(key_plays)

# Average WP by score differential
wp_by_score <- pbp %>%
  filter(!is.na(wp), !is.na(score_differential)) %>%
  group_by(score_differential) %>%
  summarize(
    avg_wp = mean(wp),
    plays = n()
  ) %>%
  filter(plays >= 100, abs(score_differential) <= 21)

print(wp_by_score)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Win probability columns: wp, vegas_wp, home_wp, away_wp
# View key WP columns
wp_cols = ["game_id", "qtr", "time", "posteam", "wp", "wpa"]
key_plays = pbp[pbp["wp"].notna()][wp_cols].head(20)
print("Sample Win Probability Data:")
print(key_plays)

# Average WP by score differential
wp_by_score = (pbp[pbp["wp"].notna() & pbp["score_differential"].notna()]
    .groupby("score_differential")
    .agg(avg_wp=("wp", "mean"), plays=("wp", "count"))
    .reset_index())

wp_by_score = wp_by_score[
    (wp_by_score["plays"] >= 100) &
    (wp_by_score["score_differential"].abs() <= 21)
]
print("\nAverage WP by Score Differential:")
print(wp_by_score)
Packages: nflfastR tidyverse nfl_data_py pandas
Win Probability Added (WPA) Leaders
Find players who contributed most to their teams win probability.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# QB WPA leaders
qb_wpa <- pbp %>%
  filter(!is.na(wpa), !is.na(passer_player_id)) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    dropbacks = n(),
    total_wpa = sum(wpa),
    positive_plays = sum(wpa > 0),
    negative_plays = sum(wpa < 0),
    .groups = "drop"
  ) %>%
  filter(dropbacks >= 200) %>%
  arrange(desc(total_wpa))

print(qb_wpa)

# Single biggest WPA plays of the season
pbp %>%
  filter(!is.na(wpa)) %>%
  arrange(desc(abs(wpa))) %>%
  select(game_id, posteam, qtr, desc, wpa) %>%
  head(10)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# QB WPA leaders
qb_plays = pbp[pbp["wpa"].notna() & pbp["passer_player_id"].notna()]
qb_wpa = (qb_plays.groupby(["passer_player_id", "passer_player_name"])
    .agg(
        dropbacks=("wpa", "count"),
        total_wpa=("wpa", "sum"),
        positive_plays=("wpa", lambda x: (x > 0).sum()),
        negative_plays=("wpa", lambda x: (x < 0).sum())
    )
    .reset_index())

qb_wpa = qb_wpa[qb_wpa["dropbacks"] >= 200].sort_values("total_wpa", ascending=False)
print("QB WPA Leaders:")
print(qb_wpa)

# Biggest WPA plays
biggest_plays = (pbp[pbp["wpa"].notna()]
    .assign(abs_wpa=lambda x: x["wpa"].abs())
    .nlargest(10, "abs_wpa")
    [["game_id", "posteam", "qtr", "desc", "wpa"]])
print("\nBiggest WPA Plays:")
print(biggest_plays)
Packages: nflfastR tidyverse nfl_data_py pandas
Clutch Performance Analysis
Identify players who perform best in high-leverage situations.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Define clutch situations: 4th quarter, WP between 25-75%
clutch_plays <- pbp %>%
  filter(
    !is.na(epa),
    qtr == 4,
    wp >= 0.25 & wp <= 0.75
  )

# QB clutch performance
qb_clutch <- clutch_plays %>%
  filter(!is.na(passer_player_id)) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    clutch_plays = n(),
    clutch_epa = sum(epa),
    clutch_wpa = sum(wpa, na.rm = TRUE),
    clutch_success = mean(success),
    .groups = "drop"
  ) %>%
  filter(clutch_plays >= 30) %>%
  arrange(desc(clutch_wpa))

print(qb_clutch)

# Compare clutch vs non-clutch
pbp %>%
  filter(!is.na(passer_player_id), !is.na(epa)) %>%
  mutate(
    is_clutch = qtr == 4 & wp >= 0.25 & wp <= 0.75
  ) %>%
  group_by(passer_player_name, is_clutch) %>%
  summarize(epa_per_play = mean(epa), .groups = "drop") %>%
  pivot_wider(names_from = is_clutch, values_from = epa_per_play)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Define clutch situations
clutch_plays = pbp[
    (pbp["epa"].notna()) &
    (pbp["qtr"] == 4) &
    (pbp["wp"] >= 0.25) &
    (pbp["wp"] <= 0.75)
]

# QB clutch performance
qb_clutch = (clutch_plays[clutch_plays["passer_player_id"].notna()]
    .groupby(["passer_player_id", "passer_player_name"])
    .agg(
        clutch_plays=("epa", "count"),
        clutch_epa=("epa", "sum"),
        clutch_wpa=("wpa", "sum"),
        clutch_success=("success", "mean")
    )
    .reset_index())

qb_clutch = qb_clutch[qb_clutch["clutch_plays"] >= 30].sort_values(
    "clutch_wpa", ascending=False)

print("QB Clutch Performance (4Q, WP 25-75%):")
print(qb_clutch)
Packages: nflfastR tidyverse nfl_data_py pandas
Game Swing Plays
Identify the biggest momentum-shifting plays in games.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Find biggest swing plays (largest WPA changes)
swing_plays <- pbp %>%
  filter(!is.na(wpa)) %>%
  mutate(abs_wpa = abs(wpa)) %>%
  arrange(desc(abs_wpa)) %>%
  select(game_id, week, posteam, qtr, time, down, ydstogo,
         desc, wpa, wp) %>%
  head(50)

print(swing_plays)

# Game-changing interceptions
ints <- pbp %>%
  filter(interception == 1, !is.na(wpa)) %>%
  arrange(desc(abs(wpa))) %>%
  select(game_id, passer_player_name, interception_player_name,
         wpa, wp, desc) %>%
  head(10)

print(ints)

# Game-changing touchdowns
tds <- pbp %>%
  filter(touchdown == 1, !is.na(wpa)) %>%
  arrange(desc(wpa)) %>%
  select(game_id, posteam, qtr, wpa, wp, desc) %>%
  head(10)

print(tds)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Find biggest swing plays
swing_plays = pbp[pbp["wpa"].notna()].copy()
swing_plays["abs_wpa"] = swing_plays["wpa"].abs()
swing_plays = swing_plays.nlargest(50, "abs_wpa")[
    ["game_id", "week", "posteam", "qtr", "time", "down",
     "ydstogo", "desc", "wpa", "wp"]
]

print("Biggest Swing Plays:")
print(swing_plays.head(20))

# Game-changing interceptions
ints = (pbp[(pbp["interception"] == 1) & (pbp["wpa"].notna())]
    .assign(abs_wpa=lambda x: x["wpa"].abs())
    .nlargest(10, "abs_wpa")
    [["game_id", "passer_player_name", "interception_player_name", "wpa", "wp"]])

print("\nBiggest Interceptions by WPA:")
print(ints)
Packages: nflfastR tidyverse nfl_data_py pandas
Fourth Quarter Comeback Analysis
Track teams that came back from deficits in the fourth quarter.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Find games with 4th quarter comebacks
comebacks <- pbp %>%
  filter(qtr == 4) %>%
  group_by(game_id, home_team, away_team) %>%
  summarize(
    start_wp_home = first(home_wp),
    end_wp_home = last(home_wp),
    home_score = last(total_home_score),
    away_score = last(total_away_score),
    .groups = "drop"
  ) %>%
  mutate(
    home_won = home_score > away_score,
    comeback = (start_wp_home < 0.25 & home_won) |
               (start_wp_home > 0.75 & !home_won),
    wp_swing = abs(end_wp_home - start_wp_home)
  ) %>%
  filter(comeback) %>%
  arrange(desc(wp_swing))

print(comebacks)

# Teams with most comebacks
comeback_counts <- comebacks %>%
  mutate(
    comeback_team = if_else(home_won, home_team, away_team)
  ) %>%
  count(comeback_team, sort = TRUE)

print(comeback_counts)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Filter to 4th quarter plays
q4 = pbp[pbp["qtr"] == 4].copy()

# Get start and end WP for each game
game_wp = (q4.groupby(["game_id", "home_team", "away_team"])
    .agg(
        start_wp_home=("home_wp", "first"),
        end_wp_home=("home_wp", "last"),
        home_score=("total_home_score", "last"),
        away_score=("total_away_score", "last")
    )
    .reset_index())

game_wp["home_won"] = game_wp["home_score"] > game_wp["away_score"]
game_wp["comeback"] = (
    ((game_wp["start_wp_home"] < 0.25) & game_wp["home_won"]) |
    ((game_wp["start_wp_home"] > 0.75) & ~game_wp["home_won"])
)
game_wp["wp_swing"] = (game_wp["end_wp_home"] - game_wp["start_wp_home"]).abs()

comebacks = game_wp[game_wp["comeback"]].sort_values("wp_swing", ascending=False)
print("4th Quarter Comebacks:")
print(comebacks)
Packages: nflfastR tidyverse nfl_data_py pandas
Win Probability Charts
Create win probability charts for individual games.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Select a specific game (e.g., Super Bowl or exciting game)
game <- pbp %>%
  filter(week == 1, home_team == "KC") %>%
  filter(!is.na(wp))

# Create WP chart data
wp_chart <- game %>%
  select(game_seconds_remaining, wp, posteam, home_team, away_team) %>%
  mutate(
    game_time = 3600 - game_seconds_remaining,
    home_wp = if_else(posteam == home_team, wp, 1 - wp)
  )

# Plot
ggplot(wp_chart, aes(x = game_time, y = home_wp)) +
  geom_line(color = "#E31837", size = 1) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "gray50") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
  scale_x_continuous(
    breaks = c(0, 900, 1800, 2700, 3600),
    labels = c("Q1", "Q2", "Q3", "Q4", "End")
  ) +
  labs(
    title = paste(unique(game$away_team), "@", unique(game$home_team)),
    x = "Game Time",
    y = "Home Win Probability"
  ) +
  theme_minimal()
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt

pbp = nfl.import_pbp_data([2023])

# Select a specific game
game = pbp[(pbp["week"] == 1) & (pbp["home_team"] == "KC")]
game = game[game["wp"].notna()].copy()

# Calculate game time and home WP
game["game_time"] = 3600 - game["game_seconds_remaining"]
game["home_wp"] = game.apply(
    lambda x: x["wp"] if x["posteam"] == x["home_team"] else 1 - x["wp"],
    axis=1
)

# Create plot
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(game["game_time"], game["home_wp"], color="#E31837", linewidth=2)
ax.axhline(y=0.5, color="gray", linestyle="--", alpha=0.5)

ax.set_xlim(0, 3600)
ax.set_ylim(0, 1)
ax.set_xticks([0, 900, 1800, 2700, 3600])
ax.set_xticklabels(["Q1", "Q2", "Q3", "Q4", "End"])
ax.set_ylabel("Home Win Probability")
ax.set_title(f"{game['away_team'].iloc[0]} @ {game['home_team'].iloc[0]}")

plt.tight_layout()
plt.savefig("wp_chart.png", dpi=300)
plt.show()
Packages: nflfastR tidyverse ggplot2 nfl_data_py pandas matplotlib
Expected Points vs Win Probability
Compare EPA and WPA to understand their relationship.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Correlation between EPA and WPA
plays <- pbp %>%
  filter(!is.na(epa), !is.na(wpa))

cat("Correlation EPA vs WPA:", cor(plays$epa, plays$wpa), "\n")

# EPA vs WPA by game situation
situation_comparison <- plays %>%
  mutate(
    situation = case_when(
      qtr <= 2 ~ "First Half",
      wp > 0.75 | wp < 0.25 ~ "Blowout",
      TRUE ~ "Competitive"
    )
  ) %>%
  group_by(situation) %>%
  summarize(
    plays = n(),
    avg_epa = mean(epa),
    avg_wpa = mean(wpa),
    epa_wpa_cor = cor(epa, wpa)
  )

print(situation_comparison)

# High EPA but low WPA (garbage time) vs low EPA high WPA (clutch)
plays %>%
  mutate(
    epa_quartile = ntile(epa, 4),
    wpa_quartile = ntile(wpa, 4)
  ) %>%
  count(epa_quartile, wpa_quartile) %>%
  pivot_wider(names_from = wpa_quartile, values_from = n)
import nfl_data_py as nfl
import pandas as pd
import numpy as np

pbp = nfl.import_pbp_data([2023])

# Filter plays with both EPA and WPA
plays = pbp[(pbp["epa"].notna()) & (pbp["wpa"].notna())]

# Overall correlation
correlation = plays["epa"].corr(plays["wpa"])
print(f"EPA vs WPA Correlation: {correlation:.3f}")

# By game situation
def get_situation(row):
    if row["qtr"] <= 2:
        return "First Half"
    elif row["wp"] > 0.75 or row["wp"] < 0.25:
        return "Blowout"
    else:
        return "Competitive"

plays["situation"] = plays.apply(get_situation, axis=1)

situation_comparison = (plays.groupby("situation")
    .agg(
        plays=("epa", "count"),
        avg_epa=("epa", "mean"),
        avg_wpa=("wpa", "mean")
    )
    .reset_index())

print("\nEPA vs WPA by Situation:")
print(situation_comparison)
Packages: nflfastR tidyverse nfl_data_py pandas numpy
Late-Game Decision Analysis
Analyze coaching decisions in crucial late-game situations using WP.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Fourth down decisions in close 4th quarter games
late_4th_downs <- pbp %>%
  filter(
    down == 4,
    qtr == 4,
    wp >= 0.20 & wp <= 0.80,
    !is.na(fourth_down_decision)
  )

# Decision distribution
late_4th_downs %>%
  count(fourth_down_decision)

# WP impact by decision
decision_impact <- late_4th_downs %>%
  group_by(fourth_down_decision) %>%
  summarize(
    attempts = n(),
    avg_wpa = mean(wpa, na.rm = TRUE),
    success_rate = mean(fourth_down_converted, na.rm = TRUE),
    avg_wp_before = mean(wp)
  )

print(decision_impact)

# Aggressive vs conservative coaches
coach_decisions <- late_4th_downs %>%
  group_by(posteam) %>%
  summarize(
    fourth_downs = n(),
    go_for_it_rate = mean(fourth_down_decision == "go"),
    punt_rate = mean(fourth_down_decision == "punt"),
    fg_rate = mean(fourth_down_decision == "field_goal")
  ) %>%
  arrange(desc(go_for_it_rate))

print(coach_decisions)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Fourth down decisions in close 4th quarter games
late_4th = pbp[
    (pbp["down"] == 4) &
    (pbp["qtr"] == 4) &
    (pbp["wp"] >= 0.20) &
    (pbp["wp"] <= 0.80) &
    (pbp["fourth_down_decision"].notna())
]

# Decision distribution
print("4th Down Decision Distribution:")
print(late_4th["fourth_down_decision"].value_counts())

# WP impact by decision
decision_impact = (late_4th.groupby("fourth_down_decision")
    .agg(
        attempts=("wpa", "count"),
        avg_wpa=("wpa", "mean"),
        avg_wp_before=("wp", "mean")
    )
    .reset_index())

print("\nDecision Impact:")
print(decision_impact)

# Team aggressiveness
team_decisions = (late_4th.groupby("posteam")
    .agg(fourth_downs=("down", "count"))
    .reset_index())

go_counts = late_4th[late_4th["fourth_down_decision"] == "go"].groupby("posteam").size()
team_decisions = team_decisions.merge(
    go_counts.reset_index(name="go_for_it"),
    on="posteam", how="left"
).fillna(0)
team_decisions["go_rate"] = team_decisions["go_for_it"] / team_decisions["fourth_downs"]

print("\nTeam Aggressiveness:")
print(team_decisions.sort_values("go_rate", ascending=False))
Packages: nflfastR tidyverse nfl_data_py pandas
Quick Package Reference
R Packages
  • nflfastR - Play-by-play data with EPA
  • nflplotR - NFL team logos & plotting
  • tidyverse - Data manipulation & visualization
  • ggplot2 - Advanced visualizations
Python Packages
  • nfl_data_py - NFL data (nflverse compatible)
  • pandas - Data manipulation
  • matplotlib - Visualizations
  • scikit-learn - Machine learning

Ready to Dive Deeper?

Learn the theory behind these techniques in our comprehensive tutorial series

Browse Tutorials