Code Examples

Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.

122 Examples
R & Python Support: All examples include both R and Python versions. Click the tabs to switch between languages. Use the copy button to copy code to clipboard.

Data Visualization

Create publication-quality charts and visualizations for NFL data

EPA Scatter Plot with Team Logos
Create an offense vs defense EPA plot with team logos.
Intermediate
library(nflfastR)
library(nflplotR)
library(tidyverse)

pbp <- load_pbp(2023)

# Calculate team EPA
team_epa <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
  group_by(posteam) %>%
  summarize(off_epa = mean(epa)) %>%
  left_join(
    pbp %>%
      filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
      group_by(defteam) %>%
      summarize(def_epa = mean(epa)),
    by = c("posteam" = "defteam")
  )

# Create plot with logos
ggplot(team_epa, aes(x = off_epa, y = def_epa)) +
  geom_mean_lines(aes(y0 = def_epa, x0 = off_epa)) +
  geom_nfl_logos(aes(team_abbr = posteam), width = 0.065) +
  labs(
    title = "NFL Team Efficiency (2023)",
    subtitle = "Offensive vs Defensive EPA/Play",
    x = "Offensive EPA/Play (higher is better)",
    y = "Defensive EPA/Play (lower is better)",
    caption = "Data: nflfastR"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.title = element_text(size = 12)
  )

ggsave("epa_scatter.png", width = 10, height = 8, dpi = 300)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

pbp = nfl.import_pbp_data([2023])

# Calculate team EPA
plays = pbp[(pbp["epa"].notna()) &
            (pbp["play_type"].isin(["pass", "run"]))]

off_epa = plays.groupby("posteam")["epa"].mean().reset_index()
off_epa.columns = ["team", "off_epa"]

def_epa = plays.groupby("defteam")["epa"].mean().reset_index()
def_epa.columns = ["team", "def_epa"]

team_epa = off_epa.merge(def_epa, on="team")

# Create scatter plot
fig, ax = plt.subplots(figsize=(12, 10))

ax.scatter(team_epa["off_epa"], team_epa["def_epa"], s=100, alpha=0.7)

# Add team labels
for idx, row in team_epa.iterrows():
    ax.annotate(row["team"], (row["off_epa"], row["def_epa"]),
                fontsize=9, ha="center", va="bottom")

# Add mean lines
ax.axhline(y=team_epa["def_epa"].mean(), color="gray",
           linestyle="--", alpha=0.5)
ax.axvline(x=team_epa["off_epa"].mean(), color="gray",
           linestyle="--", alpha=0.5)

ax.set_xlabel("Offensive EPA/Play (higher is better)", fontsize=12)
ax.set_ylabel("Defensive EPA/Play (lower is better)", fontsize=12)
ax.set_title("NFL Team Efficiency (2023)", fontsize=16, fontweight="bold")
ax.invert_yaxis()  # Lower def EPA is better

plt.tight_layout()
plt.savefig("epa_scatter.png", dpi=300)
plt.show()
Packages: nflfastR nflplotR ggplot2 tidyverse matplotlib pandas
Weekly Performance Line Chart
Track team EPA performance across the season with a line chart.
Beginner
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Get weekly EPA for specific teams
teams_of_interest <- c("KC", "SF", "BAL", "DET")

weekly_epa <- pbp %>%
  filter(!is.na(epa), posteam %in% teams_of_interest) %>%
  group_by(posteam, week) %>%
  summarize(epa_per_play = mean(epa), .groups = "drop")

# Create line chart
ggplot(weekly_epa, aes(x = week, y = epa_per_play,
                        color = posteam, group = posteam)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  scale_color_manual(values = c("KC" = "#E31837", "SF" = "#AA0000",
                                "BAL" = "#241773", "DET" = "#0076B6")) +
  labs(
    title = "Weekly EPA/Play Trends (2023)",
    x = "Week",
    y = "EPA/Play",
    color = "Team"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

ggsave("weekly_epa.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt

pbp = nfl.import_pbp_data([2023])

# Get weekly EPA for specific teams
teams = ["KC", "SF", "BAL", "DET"]
team_colors = {"KC": "#E31837", "SF": "#AA0000",
               "BAL": "#241773", "DET": "#0076B6"}

plays = pbp[(pbp["epa"].notna()) & (pbp["posteam"].isin(teams))]
weekly_epa = plays.groupby(["posteam", "week"])["epa"].mean().reset_index()

# Create line chart
fig, ax = plt.subplots(figsize=(12, 6))

for team in teams:
    team_data = weekly_epa[weekly_epa["posteam"] == team]
    ax.plot(team_data["week"], team_data["epa"],
            color=team_colors[team], label=team,
            linewidth=2, marker="o", markersize=5)

ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
ax.set_xlabel("Week", fontsize=12)
ax.set_ylabel("EPA/Play", fontsize=12)
ax.set_title("Weekly EPA/Play Trends (2023)", fontsize=14, fontweight="bold")
ax.legend(loc="lower right")

plt.tight_layout()
plt.savefig("weekly_epa.png", dpi=300)
plt.show()
Packages: nflfastR ggplot2 tidyverse matplotlib pandas
Pass Chart Visualization
Create a field visualization showing pass locations and outcomes.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Get pass data for a specific QB
qb_passes <- pbp %>%
  filter(
    passer_player_name == "P.Mahomes",
    !is.na(air_yards),
    !is.na(pass_location)
  ) %>%
  mutate(
    x = case_when(
      pass_location == "left" ~ -10,
      pass_location == "middle" ~ 0,
      pass_location == "right" ~ 10
    ),
    y = air_yards,
    result = case_when(
      interception == 1 ~ "INT",
      complete_pass == 1 ~ "Complete",
      TRUE ~ "Incomplete"
    )
  )

# Create pass chart
ggplot(qb_passes, aes(x = x, y = y, color = result)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_hline(yintercept = 0, color = "darkgreen", size = 2) +
  scale_color_manual(values = c("Complete" = "#00AA00",
                                "Incomplete" = "#AA0000",
                                "INT" = "#000000")) +
  coord_cartesian(xlim = c(-20, 20), ylim = c(-5, 40)) +
  labs(
    title = "Patrick Mahomes Pass Chart (2023)",
    x = "Field Width",
    y = "Air Yards",
    color = "Result"
  ) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "right"
  )

ggsave("pass_chart.png", width = 8, height = 10)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt

pbp = nfl.import_pbp_data([2023])

# Get pass data for a specific QB
qb_passes = pbp[
    (pbp["passer_player_name"] == "P.Mahomes") &
    (pbp["air_yards"].notna()) &
    (pbp["pass_location"].notna())
].copy()

# Map pass location to x coordinate
location_map = {"left": -10, "middle": 0, "right": 10}
qb_passes["x"] = qb_passes["pass_location"].map(location_map)
qb_passes["y"] = qb_passes["air_yards"]

# Determine result
def get_result(row):
    if row["interception"] == 1: return "INT"
    elif row["complete_pass"] == 1: return "Complete"
    else: return "Incomplete"

qb_passes["result"] = qb_passes.apply(get_result, axis=1)

# Create pass chart
fig, ax = plt.subplots(figsize=(8, 10))

colors = {"Complete": "#00AA00", "Incomplete": "#AA0000", "INT": "#000000"}
for result, color in colors.items():
    data = qb_passes[qb_passes["result"] == result]
    ax.scatter(data["x"], data["y"], c=color, label=result,
               alpha=0.6, s=30)

ax.axhline(y=0, color="darkgreen", linewidth=3)
ax.set_xlim(-20, 20)
ax.set_ylim(-5, 40)
ax.set_xlabel("Field Width")
ax.set_ylabel("Air Yards")
ax.set_title("Patrick Mahomes Pass Chart (2023)", fontweight="bold")
ax.legend()

plt.tight_layout()
plt.savefig("pass_chart.png", dpi=300)
plt.show()
Packages: nflfastR ggplot2 tidyverse matplotlib pandas
Heatmap Visualization
Create heatmaps for play type efficiency by down and distance.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Create EPA heatmap by down and distance bucket
heatmap_data <- pbp %>%
  filter(!is.na(epa), down <= 4, !is.na(ydstogo)) %>%
  mutate(
    distance_bucket = case_when(
      ydstogo <= 3 ~ "1-3",
      ydstogo <= 6 ~ "4-6",
      ydstogo <= 10 ~ "7-10",
      TRUE ~ "10+"
    )
  ) %>%
  group_by(down, distance_bucket) %>%
  summarize(
    avg_epa = mean(epa),
    plays = n(),
    .groups = "drop"
  )

# Create heatmap
ggplot(heatmap_data, aes(x = distance_bucket, y = factor(down), fill = avg_epa)) +
  geom_tile(color = "white", size = 0.5) +
  geom_text(aes(label = round(avg_epa, 3)), color = "white", size = 4) +
  scale_fill_gradient2(low = "#BB0000", mid = "gray90", high = "#006400",
                        midpoint = 0, name = "EPA/Play") +
  labs(
    title = "EPA by Down and Distance (2023)",
    x = "Yards to Go",
    y = "Down"
  ) +
  theme_minimal() +
  theme(panel.grid = element_blank())

ggsave("epa_heatmap.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pbp = nfl.import_pbp_data([2023])

# Filter valid plays
plays = pbp[(pbp["epa"].notna()) & (pbp["down"] <= 4) & (pbp["ydstogo"].notna())]

# Create distance buckets
def distance_bucket(yd):
    if yd <= 3: return "1-3"
    elif yd <= 6: return "4-6"
    elif yd <= 10: return "7-10"
    else: return "10+"

plays["distance_bucket"] = plays["ydstogo"].apply(distance_bucket)

# Calculate EPA by down and distance
heatmap_data = plays.groupby(["down", "distance_bucket"])["epa"].mean().reset_index()
pivot_data = heatmap_data.pivot(index="down", columns="distance_bucket", values="epa")

# Reorder columns
col_order = ["1-3", "4-6", "7-10", "10+"]
pivot_data = pivot_data[[c for c in col_order if c in pivot_data.columns]]

# Create heatmap
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(pivot_data, annot=True, fmt=".3f", cmap="RdYlGn", center=0,
            linewidths=0.5, ax=ax, cbar_kws={"label": "EPA/Play"})

ax.set_xlabel("Yards to Go")
ax.set_ylabel("Down")
ax.set_title("EPA by Down and Distance (2023)", fontweight="bold")

plt.tight_layout()
plt.savefig("epa_heatmap.png", dpi=300)
plt.show()
Packages: nflfastR ggplot2 tidyverse seaborn matplotlib pandas
Radar/Spider Chart
Create radar charts comparing QB performance across metrics.
Advanced
library(nflfastR)
library(tidyverse)
library(fmsb)  # For radar charts

pbp <- load_pbp(2023)

# Calculate QB metrics
qb_metrics <- pbp %>%
  filter(!is.na(passer_player_id), play_type == "pass") %>%
  group_by(passer_player_name) %>%
  summarize(
    attempts = n(),
    epa_play = mean(epa, na.rm = TRUE),
    cpoe = mean(cpoe, na.rm = TRUE),
    avg_air = mean(air_yards, na.rm = TRUE),
    deep_pct = mean(air_yards >= 20, na.rm = TRUE) * 100,
    sack_rate = mean(sack == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  filter(attempts >= 300)

# Normalize to 0-100 scale for radar
normalize <- function(x) (x - min(x)) / (max(x) - min(x)) * 100

qb_radar <- qb_metrics %>%
  mutate(across(c(epa_play, cpoe, avg_air, deep_pct), normalize)) %>%
  mutate(sack_rate = 100 - normalize(sack_rate))  # Invert - lower is better

# Get top QBs for comparison
top_qbs <- c("P.Mahomes", "J.Allen", "L.Jackson")
radar_data <- qb_radar %>%
  filter(passer_player_name %in% top_qbs) %>%
  select(passer_player_name, epa_play, cpoe, avg_air, deep_pct, sack_rate)

print(radar_data)
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import pi

pbp = nfl.import_pbp_data([2023])

# Calculate QB metrics
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["play_type"] == "pass")]

qb_metrics = (passes.groupby("passer_player_name")
    .agg(
        attempts=("play_id", "count"),
        epa_play=("epa", "mean"),
        cpoe=("cpoe", "mean"),
        avg_air=("air_yards", "mean"),
        deep_pct=("air_yards", lambda x: (x >= 20).mean() * 100),
        sack_rate=("sack", "mean")
    )
    .reset_index())

qb_metrics = qb_metrics[qb_metrics["attempts"] >= 300]

# Normalize to 0-100
def normalize(col):
    return (col - col.min()) / (col.max() - col.min()) * 100

for col in ["epa_play", "cpoe", "avg_air", "deep_pct"]:
    qb_metrics[col + "_norm"] = normalize(qb_metrics[col])

# Invert sack rate (lower is better)
qb_metrics["sack_norm"] = 100 - normalize(qb_metrics["sack_rate"] * 100)

# Create radar chart for top QBs
categories = ["EPA/Play", "CPOE", "Air Yards", "Deep%", "Sack Avoidance"]
qbs = ["P.Mahomes", "J.Allen", "L.Jackson"]

fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]

for qb in qbs:
    data = qb_metrics[qb_metrics["passer_player_name"] == qb]
    if not data.empty:
        values = [data["epa_play_norm"].iloc[0], data["cpoe_norm"].iloc[0],
                  data["avg_air_norm"].iloc[0], data["deep_pct_norm"].iloc[0],
                  data["sack_norm"].iloc[0]]
        values += values[:1]
        ax.plot(angles, values, linewidth=2, label=qb)
        ax.fill(angles, values, alpha=0.1)

ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories)
ax.set_title("QB Comparison Radar Chart", fontweight="bold")
ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.0))
plt.tight_layout()
plt.savefig("qb_radar.png", dpi=300)
plt.show()
Packages: nflfastR tidyverse fmsb matplotlib pandas numpy
Box Plot Comparisons
Compare distributions using box plots for player or team analysis.
Beginner
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# EPA distribution by play type
play_epa <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run"))

# Create box plot
ggplot(play_epa, aes(x = play_type, y = epa, fill = play_type)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  coord_cartesian(ylim = c(-3, 3)) +
  scale_fill_manual(values = c("pass" = "#1E90FF", "run" = "#228B22")) +
  labs(
    title = "EPA Distribution by Play Type (2023)",
    x = "Play Type",
    y = "EPA",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

ggsave("epa_boxplot.png", width = 8, height = 6)

# Box plot by down
ggplot(play_epa %>% filter(!is.na(down), down <= 4),
       aes(x = factor(down), y = epa, fill = factor(down))) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  coord_cartesian(ylim = c(-2, 2)) +
  labs(title = "EPA by Down", x = "Down", y = "EPA") +
  theme_minimal() +
  theme(legend.position = "none")

ggsave("epa_by_down.png", width = 8, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pbp = nfl.import_pbp_data([2023])

# Filter plays with EPA
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]

# Create box plot by play type
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Plot 1: By play type
sns.boxplot(data=plays, x="play_type", y="epa", ax=axes[0],
            palette={"pass": "#1E90FF", "run": "#228B22"},
            showfliers=False)
axes[0].set_ylim(-3, 3)
axes[0].set_xlabel("Play Type")
axes[0].set_ylabel("EPA")
axes[0].set_title("EPA Distribution by Play Type (2023)")

# Plot 2: By down
plays_by_down = plays[(plays["down"].notna()) & (plays["down"] <= 4)]
sns.boxplot(data=plays_by_down, x="down", y="epa", ax=axes[1],
            palette="viridis", showfliers=False)
axes[1].set_ylim(-2, 2)
axes[1].set_xlabel("Down")
axes[1].set_ylabel("EPA")
axes[1].set_title("EPA Distribution by Down (2023)")

plt.tight_layout()
plt.savefig("epa_boxplots.png", dpi=300)
plt.show()
Packages: nflfastR ggplot2 tidyverse seaborn matplotlib pandas
Interactive Plotly Dashboard
Create interactive visualizations with hover tooltips.
Advanced
library(nflfastR)
library(tidyverse)
library(plotly)

pbp <- load_pbp(2023)

# Calculate team stats
team_stats <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
  group_by(posteam) %>%
  summarize(
    off_epa = mean(epa),
    pass_rate = mean(play_type == "pass") * 100,
    plays = n(),
    .groups = "drop"
  ) %>%
  left_join(
    pbp %>%
      filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
      group_by(defteam) %>%
      summarize(def_epa = mean(epa)),
    by = c("posteam" = "defteam")
  )

# Create interactive scatter plot
p <- plot_ly(team_stats,
  x = ~off_epa,
  y = ~def_epa,
  text = ~paste("Team:", posteam,
                "<br>Off EPA:", round(off_epa, 3),
                "<br>Def EPA:", round(def_epa, 3),
                "<br>Pass Rate:", round(pass_rate, 1), "%"),
  hoverinfo = "text",
  type = "scatter",
  mode = "markers",
  marker = list(size = 12, opacity = 0.8)
) %>%
  layout(
    title = "NFL Team Efficiency (2023)",
    xaxis = list(title = "Offensive EPA/Play"),
    yaxis = list(title = "Defensive EPA/Play", autorange = "reversed")
  )

# Save or display
htmlwidgets::saveWidget(p, "team_efficiency.html")
import nfl_data_py as nfl
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

pbp = nfl.import_pbp_data([2023])

# Calculate team stats
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]

off_stats = (plays.groupby("posteam")
    .agg(
        off_epa=("epa", "mean"),
        pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
        plays=("epa", "count")
    )
    .reset_index())

def_stats = (plays.groupby("defteam")
    .agg(def_epa=("epa", "mean"))
    .reset_index())

team_stats = off_stats.merge(def_stats, left_on="posteam", right_on="defteam")

# Create interactive scatter with Plotly
fig = px.scatter(
    team_stats,
    x="off_epa",
    y="def_epa",
    text="posteam",
    hover_data={"off_epa": ":.3f", "def_epa": ":.3f", "pass_rate": ":.1f"},
    title="NFL Team Efficiency (2023)"
)

fig.update_traces(textposition="top center", marker=dict(size=12))
fig.update_yaxes(autorange="reversed")  # Lower def EPA is better
fig.update_layout(
    xaxis_title="Offensive EPA/Play (higher is better)",
    yaxis_title="Defensive EPA/Play (lower is better)"
)

fig.write_html("team_efficiency.html")
fig.show()
Packages: nflfastR tidyverse plotly pandas
Histogram Distributions
Visualize distributions of key metrics with histograms.
Beginner
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# EPA histogram
epa_data <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run"))

ggplot(epa_data, aes(x = epa, fill = play_type)) +
  geom_histogram(bins = 50, alpha = 0.6, position = "identity") +
  scale_fill_manual(values = c("pass" = "#1E90FF", "run" = "#228B22")) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  coord_cartesian(xlim = c(-5, 5)) +
  labs(
    title = "Distribution of EPA by Play Type (2023)",
    x = "Expected Points Added (EPA)",
    y = "Count",
    fill = "Play Type"
  ) +
  theme_minimal()

ggsave("epa_histogram.png", width = 10, height = 6)

# Air yards histogram
air_data <- pbp %>%
  filter(!is.na(air_yards), play_type == "pass")

ggplot(air_data, aes(x = air_yards)) +
  geom_histogram(bins = 40, fill = "#1E90FF", alpha = 0.7) +
  geom_vline(aes(xintercept = mean(air_yards)), color = "red",
             linetype = "dashed", size = 1) +
  labs(
    title = "Distribution of Air Yards (2023)",
    x = "Air Yards",
    y = "Count",
    caption = "Red line = mean"
  ) +
  theme_minimal()

ggsave("air_yards_histogram.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt

pbp = nfl.import_pbp_data([2023])

# EPA histogram by play type
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: EPA distribution
for play_type, color in [("pass", "#1E90FF"), ("run", "#228B22")]:
    data = plays[plays["play_type"] == play_type]["epa"]
    axes[0].hist(data, bins=50, alpha=0.6, color=color, label=play_type.capitalize())

axes[0].axvline(x=0, color="black", linestyle="--")
axes[0].set_xlim(-5, 5)
axes[0].set_xlabel("EPA")
axes[0].set_ylabel("Count")
axes[0].set_title("Distribution of EPA by Play Type (2023)")
axes[0].legend()

# Plot 2: Air yards distribution
air_data = pbp[(pbp["air_yards"].notna()) & (pbp["play_type"] == "pass")]
axes[1].hist(air_data["air_yards"], bins=40, color="#1E90FF", alpha=0.7)
mean_air = air_data["air_yards"].mean()
axes[1].axvline(x=mean_air, color="red", linestyle="--", label=f"Mean: {mean_air:.1f}")
axes[1].set_xlabel("Air Yards")
axes[1].set_ylabel("Count")
axes[1].set_title("Distribution of Air Yards (2023)")
axes[1].legend()

plt.tight_layout()
plt.savefig("histograms.png", dpi=300)
plt.show()
Packages: nflfastR ggplot2 tidyverse matplotlib pandas
Correlation Matrix
Visualize correlations between multiple metrics.
Intermediate
library(nflfastR)
library(tidyverse)
library(corrplot)

pbp <- load_pbp(2023)

# Calculate team metrics for correlation
team_metrics <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
  group_by(posteam) %>%
  summarize(
    off_epa = mean(epa),
    pass_rate = mean(play_type == "pass") * 100,
    pass_epa = mean(epa[play_type == "pass"]),
    run_epa = mean(epa[play_type == "run"]),
    success_rate = mean(success, na.rm = TRUE) * 100,
    explosive_rate = mean(epa > 2, na.rm = TRUE) * 100,
    .groups = "drop"
  )

# Create correlation matrix
cor_matrix <- team_metrics %>%
  select(-posteam) %>%
  cor(use = "complete.obs")

# Visualize
corrplot(cor_matrix,
         method = "color",
         type = "upper",
         addCoef.col = "black",
         number.cex = 0.7,
         tl.col = "black",
         title = "Team Offensive Metrics Correlation",
         mar = c(0, 0, 2, 0))

png("correlation_matrix.png", width = 800, height = 800)
corrplot(cor_matrix, method = "color", type = "upper",
         addCoef.col = "black", number.cex = 0.8)
dev.off()
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pbp = nfl.import_pbp_data([2023])

# Calculate team metrics
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]

def calculate_metrics(group):
    return pd.Series({
        "off_epa": group["epa"].mean(),
        "pass_rate": (group["play_type"] == "pass").mean() * 100,
        "pass_epa": group[group["play_type"] == "pass"]["epa"].mean(),
        "run_epa": group[group["play_type"] == "run"]["epa"].mean(),
        "success_rate": group["success"].mean() * 100,
        "explosive_rate": (group["epa"] > 2).mean() * 100
    })

team_metrics = plays.groupby("posteam").apply(calculate_metrics).reset_index()

# Calculate correlation matrix
metrics_only = team_metrics.drop("posteam", axis=1)
cor_matrix = metrics_only.corr()

# Create heatmap
fig, ax = plt.subplots(figsize=(10, 8))
mask = np.triu(np.ones_like(cor_matrix, dtype=bool))

sns.heatmap(cor_matrix, mask=mask, annot=True, fmt=".2f",
            cmap="coolwarm", center=0, square=True,
            linewidths=0.5, ax=ax)

ax.set_title("Team Offensive Metrics Correlation", fontweight="bold")
plt.tight_layout()
plt.savefig("correlation_matrix.png", dpi=300)
plt.show()
Packages: nflfastR tidyverse corrplot seaborn matplotlib pandas
Time Series with Annotations
Create time series plots with key events annotated.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Calculate rolling EPA for a team
team_rolling <- pbp %>%
  filter(posteam == "SF", !is.na(epa), play_type %in% c("pass", "run")) %>%
  arrange(game_id, play_id) %>%
  mutate(
    play_num = row_number(),
    rolling_epa = zoo::rollmean(epa, k = 50, fill = NA, align = "right")
  )

# Define key game events
key_games <- team_rolling %>%
  group_by(game_id, week) %>%
  summarize(
    play_num = max(play_num),
    rolling_epa = last(na.omit(rolling_epa)),
    .groups = "drop"
  ) %>%
  filter(week %in% c(1, 8, 18))  # Annotate select weeks

# Create time series with annotations
ggplot(team_rolling, aes(x = play_num, y = rolling_epa)) +
  geom_line(color = "#AA0000", size = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(data = key_games, color = "black", size = 3) +
  geom_text(data = key_games, aes(label = paste("Week", week)),
            vjust = -1.5, size = 3) +
  labs(
    title = "San Francisco 49ers - Rolling EPA/Play (2023)",
    subtitle = "50-play rolling average",
    x = "Play Number",
    y = "Rolling EPA/Play"
  ) +
  theme_minimal()

ggsave("rolling_epa.png", width = 12, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt

pbp = nfl.import_pbp_data([2023])

# Calculate rolling EPA for SF
sf_plays = pbp[(pbp["posteam"] == "SF") &
               (pbp["epa"].notna()) &
               (pbp["play_type"].isin(["pass", "run"]))].copy()

sf_plays = sf_plays.sort_values(["game_id", "play_id"])
sf_plays["play_num"] = range(1, len(sf_plays) + 1)
sf_plays["rolling_epa"] = sf_plays["epa"].rolling(window=50).mean()

# Get key week markers
key_weeks = [1, 8, 18]
annotations = []
for week in key_weeks:
    week_data = sf_plays[sf_plays["week"] == week]
    if not week_data.empty:
        annotations.append({
            "week": week,
            "play_num": week_data["play_num"].max(),
            "rolling_epa": week_data["rolling_epa"].dropna().iloc[-1] if not week_data["rolling_epa"].dropna().empty else 0
        })

# Create plot
fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(sf_plays["play_num"], sf_plays["rolling_epa"],
        color="#AA0000", linewidth=1.5, label="Rolling EPA")
ax.axhline(y=0, color="gray", linestyle="--", alpha=0.7)

# Add annotations
for ann in annotations:
    ax.scatter(ann["play_num"], ann["rolling_epa"], color="black", s=50, zorder=5)
    ax.annotate(f"Week {ann[\"week\"]}", (ann["play_num"], ann["rolling_epa"]),
                textcoords="offset points", xytext=(0, 10), ha="center")

ax.set_xlabel("Play Number")
ax.set_ylabel("Rolling EPA/Play (50-play average)")
ax.set_title("San Francisco 49ers - Rolling EPA/Play (2023)", fontweight="bold")

plt.tight_layout()
plt.savefig("rolling_epa.png", dpi=300)
plt.show()
Packages: nflfastR tidyverse zoo matplotlib pandas
Field Position Visualization
Create field diagrams showing drive starts and scoring.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Get drive starting positions and outcomes
drive_data <- pbp %>%
  filter(!is.na(drive), !is.na(yardline_100)) %>%
  group_by(game_id, drive) %>%
  summarize(
    start_yard = first(yardline_100),
    result = last(fixed_drive_result),
    posteam = first(posteam),
    .groups = "drop"
  ) %>%
  filter(!is.na(result))

# Summarize by field position
field_summary <- drive_data %>%
  mutate(
    zone = case_when(
      start_yard >= 80 ~ "Own 0-20",
      start_yard >= 60 ~ "Own 20-40",
      start_yard >= 40 ~ "Midfield",
      start_yard >= 20 ~ "Opp 20-40",
      TRUE ~ "Red Zone"
    ),
    scored = result == "Touchdown"
  ) %>%
  group_by(zone) %>%
  summarize(
    drives = n(),
    tds = sum(scored),
    td_rate = mean(scored) * 100,
    .groups = "drop"
  )

# Create bar chart
ggplot(field_summary, aes(x = reorder(zone, -td_rate), y = td_rate, fill = td_rate)) +
  geom_col() +
  geom_text(aes(label = paste0(round(td_rate, 1), "%")), vjust = -0.5) +
  scale_fill_gradient(low = "#FFC107", high = "#28A745") +
  labs(
    title = "Touchdown Rate by Starting Field Position",
    x = "Starting Field Position",
    y = "TD Rate (%)"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

ggsave("field_position.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

pbp = nfl.import_pbp_data([2023])

# Get drive starting positions and outcomes
drives = pbp[(pbp["drive"].notna()) & (pbp["yardline_100"].notna())]

drive_summary = (drives.groupby(["game_id", "drive"])
    .agg(
        start_yard=("yardline_100", "first"),
        result=("fixed_drive_result", "last")
    )
    .reset_index())

drive_summary = drive_summary[drive_summary["result"].notna()]

# Assign field zones
def get_zone(yard):
    if yard >= 80: return "Own 0-20"
    elif yard >= 60: return "Own 20-40"
    elif yard >= 40: return "Midfield"
    elif yard >= 20: return "Opp 20-40"
    else: return "Red Zone"

drive_summary["zone"] = drive_summary["start_yard"].apply(get_zone)
drive_summary["scored"] = drive_summary["result"] == "Touchdown"

# Calculate TD rate by zone
zone_stats = (drive_summary.groupby("zone")
    .agg(
        drives=("scored", "count"),
        tds=("scored", "sum"),
        td_rate=("scored", "mean")
    )
    .reset_index())

zone_stats["td_rate_pct"] = zone_stats["td_rate"] * 100
zone_stats = zone_stats.sort_values("td_rate_pct", ascending=False)

# Create bar chart
fig, ax = plt.subplots(figsize=(10, 6))

colors = plt.cm.RdYlGn(zone_stats["td_rate"])
bars = ax.bar(zone_stats["zone"], zone_stats["td_rate_pct"], color=colors)

for bar, rate in zip(bars, zone_stats["td_rate_pct"]):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
            f"{rate:.1f}%", ha="center", va="bottom")

ax.set_xlabel("Starting Field Position")
ax.set_ylabel("TD Rate (%)")
ax.set_title("Touchdown Rate by Starting Field Position", fontweight="bold")

plt.tight_layout()
plt.savefig("field_position.png", dpi=300)
plt.show()
Packages: nflfastR ggplot2 tidyverse matplotlib pandas
Animated Season Progress
Create animated visualizations showing team rankings over time.
Advanced
library(nflfastR)
library(tidyverse)
library(gganimate)
library(gifski)

pbp <- load_pbp(2023)

# Calculate cumulative EPA by week
weekly_cumulative <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
  group_by(posteam, week) %>%
  summarize(weekly_epa = mean(epa), .groups = "drop") %>%
  arrange(posteam, week) %>%
  group_by(posteam) %>%
  mutate(cumulative_epa = cumsum(weekly_epa) / row_number()) %>%
  ungroup()

# Add rankings
weekly_ranked <- weekly_cumulative %>%
  group_by(week) %>%
  mutate(rank = rank(-cumulative_epa)) %>%
  ungroup()

# Create animated bar chart race
p <- ggplot(weekly_ranked,
            aes(x = rank, y = cumulative_epa, fill = posteam)) +
  geom_col(width = 0.8) +
  geom_text(aes(label = posteam), hjust = -0.1, size = 4) +
  coord_flip(clip = "off") +
  scale_x_reverse() +
  labs(
    title = "NFL EPA Rankings Through Week {closest_state}",
    x = "Rank",
    y = "Cumulative EPA/Play"
  ) +
  theme_minimal() +
  theme(legend.position = "none") +
  transition_states(week, transition_length = 4, state_length = 1)

# Render animation
animate(p, nframes = 200, fps = 20, width = 800, height = 600)
anim_save("epa_race.gif")
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import matplotlib.animation as animation

pbp = nfl.import_pbp_data([2023])

# Calculate cumulative EPA by week
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]

weekly_epa = (plays.groupby(["posteam", "week"])["epa"]
    .mean()
    .reset_index()
    .sort_values(["posteam", "week"]))

# Calculate cumulative average
weekly_epa["cumulative_epa"] = weekly_epa.groupby("posteam")["epa"].expanding().mean().reset_index(drop=True)

# Get rankings per week
weekly_epa["rank"] = weekly_epa.groupby("week")["cumulative_epa"].rank(ascending=False)

# Create static snapshot for a specific week (animation requires extra setup)
final_week = weekly_epa["week"].max()
final_data = weekly_epa[weekly_epa["week"] == final_week].sort_values("cumulative_epa", ascending=True)

fig, ax = plt.subplots(figsize=(10, 12))

colors = plt.cm.RdYlGn((final_data["cumulative_epa"] - final_data["cumulative_epa"].min()) /
                        (final_data["cumulative_epa"].max() - final_data["cumulative_epa"].min()))

bars = ax.barh(final_data["posteam"], final_data["cumulative_epa"], color=colors)

ax.set_xlabel("Cumulative EPA/Play")
ax.set_title(f"NFL EPA Rankings Through Week {final_week}", fontweight="bold")
ax.axvline(x=0, color="gray", linestyle="--", alpha=0.5)

for bar, epa in zip(bars, final_data["cumulative_epa"]):
    ax.text(epa + 0.005, bar.get_y() + bar.get_height()/2,
            f"{epa:.3f}", va="center", fontsize=8)

plt.tight_layout()
plt.savefig("epa_rankings.png", dpi=300)
plt.show()

print("Note: For true animation, use matplotlib FuncAnimation or plotly")
Packages: nflfastR tidyverse gganimate gifski matplotlib pandas
Quick Package Reference
R Packages
  • nflfastR - Play-by-play data with EPA
  • nflplotR - NFL team logos & plotting
  • tidyverse - Data manipulation & visualization
  • ggplot2 - Advanced visualizations
Python Packages
  • nfl_data_py - NFL data (nflverse compatible)
  • pandas - Data manipulation
  • matplotlib - Visualizations
  • scikit-learn - Machine learning

Ready to Dive Deeper?

Learn the theory behind these techniques in our comprehensive tutorial series

Browse Tutorials