Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.
Create publication-quality charts and visualizations for NFL data
library(nflfastR)
library(nflplotR)
library(tidyverse)
pbp <- load_pbp(2023)
# Calculate team EPA
team_epa <- pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
group_by(posteam) %>%
summarize(off_epa = mean(epa)) %>%
left_join(
pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
group_by(defteam) %>%
summarize(def_epa = mean(epa)),
by = c("posteam" = "defteam")
)
# Create plot with logos
ggplot(team_epa, aes(x = off_epa, y = def_epa)) +
geom_mean_lines(aes(y0 = def_epa, x0 = off_epa)) +
geom_nfl_logos(aes(team_abbr = posteam), width = 0.065) +
labs(
title = "NFL Team Efficiency (2023)",
subtitle = "Offensive vs Defensive EPA/Play",
x = "Offensive EPA/Play (higher is better)",
y = "Defensive EPA/Play (lower is better)",
caption = "Data: nflfastR"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(size = 12)
)
ggsave("epa_scatter.png", width = 10, height = 8, dpi = 300)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pbp = nfl.import_pbp_data([2023])
# Calculate team EPA
plays = pbp[(pbp["epa"].notna()) &
(pbp["play_type"].isin(["pass", "run"]))]
off_epa = plays.groupby("posteam")["epa"].mean().reset_index()
off_epa.columns = ["team", "off_epa"]
def_epa = plays.groupby("defteam")["epa"].mean().reset_index()
def_epa.columns = ["team", "def_epa"]
team_epa = off_epa.merge(def_epa, on="team")
# Create scatter plot
fig, ax = plt.subplots(figsize=(12, 10))
ax.scatter(team_epa["off_epa"], team_epa["def_epa"], s=100, alpha=0.7)
# Add team labels
for idx, row in team_epa.iterrows():
ax.annotate(row["team"], (row["off_epa"], row["def_epa"]),
fontsize=9, ha="center", va="bottom")
# Add mean lines
ax.axhline(y=team_epa["def_epa"].mean(), color="gray",
linestyle="--", alpha=0.5)
ax.axvline(x=team_epa["off_epa"].mean(), color="gray",
linestyle="--", alpha=0.5)
ax.set_xlabel("Offensive EPA/Play (higher is better)", fontsize=12)
ax.set_ylabel("Defensive EPA/Play (lower is better)", fontsize=12)
ax.set_title("NFL Team Efficiency (2023)", fontsize=16, fontweight="bold")
ax.invert_yaxis() # Lower def EPA is better
plt.tight_layout()
plt.savefig("epa_scatter.png", dpi=300)
plt.show()
nflfastR
nflplotR
ggplot2
tidyverse
matplotlib
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Get weekly EPA for specific teams
teams_of_interest <- c("KC", "SF", "BAL", "DET")
weekly_epa <- pbp %>%
filter(!is.na(epa), posteam %in% teams_of_interest) %>%
group_by(posteam, week) %>%
summarize(epa_per_play = mean(epa), .groups = "drop")
# Create line chart
ggplot(weekly_epa, aes(x = week, y = epa_per_play,
color = posteam, group = posteam)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("KC" = "#E31837", "SF" = "#AA0000",
"BAL" = "#241773", "DET" = "#0076B6")) +
labs(
title = "Weekly EPA/Play Trends (2023)",
x = "Week",
y = "EPA/Play",
color = "Team"
) +
theme_minimal() +
theme(legend.position = "bottom")
ggsave("weekly_epa.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
pbp = nfl.import_pbp_data([2023])
# Get weekly EPA for specific teams
teams = ["KC", "SF", "BAL", "DET"]
team_colors = {"KC": "#E31837", "SF": "#AA0000",
"BAL": "#241773", "DET": "#0076B6"}
plays = pbp[(pbp["epa"].notna()) & (pbp["posteam"].isin(teams))]
weekly_epa = plays.groupby(["posteam", "week"])["epa"].mean().reset_index()
# Create line chart
fig, ax = plt.subplots(figsize=(12, 6))
for team in teams:
team_data = weekly_epa[weekly_epa["posteam"] == team]
ax.plot(team_data["week"], team_data["epa"],
color=team_colors[team], label=team,
linewidth=2, marker="o", markersize=5)
ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
ax.set_xlabel("Week", fontsize=12)
ax.set_ylabel("EPA/Play", fontsize=12)
ax.set_title("Weekly EPA/Play Trends (2023)", fontsize=14, fontweight="bold")
ax.legend(loc="lower right")
plt.tight_layout()
plt.savefig("weekly_epa.png", dpi=300)
plt.show()
nflfastR
ggplot2
tidyverse
matplotlib
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Get pass data for a specific QB
qb_passes <- pbp %>%
filter(
passer_player_name == "P.Mahomes",
!is.na(air_yards),
!is.na(pass_location)
) %>%
mutate(
x = case_when(
pass_location == "left" ~ -10,
pass_location == "middle" ~ 0,
pass_location == "right" ~ 10
),
y = air_yards,
result = case_when(
interception == 1 ~ "INT",
complete_pass == 1 ~ "Complete",
TRUE ~ "Incomplete"
)
)
# Create pass chart
ggplot(qb_passes, aes(x = x, y = y, color = result)) +
geom_point(alpha = 0.6, size = 2) +
geom_hline(yintercept = 0, color = "darkgreen", size = 2) +
scale_color_manual(values = c("Complete" = "#00AA00",
"Incomplete" = "#AA0000",
"INT" = "#000000")) +
coord_cartesian(xlim = c(-20, 20), ylim = c(-5, 40)) +
labs(
title = "Patrick Mahomes Pass Chart (2023)",
x = "Field Width",
y = "Air Yards",
color = "Result"
) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
legend.position = "right"
)
ggsave("pass_chart.png", width = 8, height = 10)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
pbp = nfl.import_pbp_data([2023])
# Get pass data for a specific QB
qb_passes = pbp[
(pbp["passer_player_name"] == "P.Mahomes") &
(pbp["air_yards"].notna()) &
(pbp["pass_location"].notna())
].copy()
# Map pass location to x coordinate
location_map = {"left": -10, "middle": 0, "right": 10}
qb_passes["x"] = qb_passes["pass_location"].map(location_map)
qb_passes["y"] = qb_passes["air_yards"]
# Determine result
def get_result(row):
if row["interception"] == 1: return "INT"
elif row["complete_pass"] == 1: return "Complete"
else: return "Incomplete"
qb_passes["result"] = qb_passes.apply(get_result, axis=1)
# Create pass chart
fig, ax = plt.subplots(figsize=(8, 10))
colors = {"Complete": "#00AA00", "Incomplete": "#AA0000", "INT": "#000000"}
for result, color in colors.items():
data = qb_passes[qb_passes["result"] == result]
ax.scatter(data["x"], data["y"], c=color, label=result,
alpha=0.6, s=30)
ax.axhline(y=0, color="darkgreen", linewidth=3)
ax.set_xlim(-20, 20)
ax.set_ylim(-5, 40)
ax.set_xlabel("Field Width")
ax.set_ylabel("Air Yards")
ax.set_title("Patrick Mahomes Pass Chart (2023)", fontweight="bold")
ax.legend()
plt.tight_layout()
plt.savefig("pass_chart.png", dpi=300)
plt.show()
nflfastR
ggplot2
tidyverse
matplotlib
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Create EPA heatmap by down and distance bucket
heatmap_data <- pbp %>%
filter(!is.na(epa), down <= 4, !is.na(ydstogo)) %>%
mutate(
distance_bucket = case_when(
ydstogo <= 3 ~ "1-3",
ydstogo <= 6 ~ "4-6",
ydstogo <= 10 ~ "7-10",
TRUE ~ "10+"
)
) %>%
group_by(down, distance_bucket) %>%
summarize(
avg_epa = mean(epa),
plays = n(),
.groups = "drop"
)
# Create heatmap
ggplot(heatmap_data, aes(x = distance_bucket, y = factor(down), fill = avg_epa)) +
geom_tile(color = "white", size = 0.5) +
geom_text(aes(label = round(avg_epa, 3)), color = "white", size = 4) +
scale_fill_gradient2(low = "#BB0000", mid = "gray90", high = "#006400",
midpoint = 0, name = "EPA/Play") +
labs(
title = "EPA by Down and Distance (2023)",
x = "Yards to Go",
y = "Down"
) +
theme_minimal() +
theme(panel.grid = element_blank())
ggsave("epa_heatmap.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
pbp = nfl.import_pbp_data([2023])
# Filter valid plays
plays = pbp[(pbp["epa"].notna()) & (pbp["down"] <= 4) & (pbp["ydstogo"].notna())]
# Create distance buckets
def distance_bucket(yd):
if yd <= 3: return "1-3"
elif yd <= 6: return "4-6"
elif yd <= 10: return "7-10"
else: return "10+"
plays["distance_bucket"] = plays["ydstogo"].apply(distance_bucket)
# Calculate EPA by down and distance
heatmap_data = plays.groupby(["down", "distance_bucket"])["epa"].mean().reset_index()
pivot_data = heatmap_data.pivot(index="down", columns="distance_bucket", values="epa")
# Reorder columns
col_order = ["1-3", "4-6", "7-10", "10+"]
pivot_data = pivot_data[[c for c in col_order if c in pivot_data.columns]]
# Create heatmap
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(pivot_data, annot=True, fmt=".3f", cmap="RdYlGn", center=0,
linewidths=0.5, ax=ax, cbar_kws={"label": "EPA/Play"})
ax.set_xlabel("Yards to Go")
ax.set_ylabel("Down")
ax.set_title("EPA by Down and Distance (2023)", fontweight="bold")
plt.tight_layout()
plt.savefig("epa_heatmap.png", dpi=300)
plt.show()
nflfastR
ggplot2
tidyverse
seaborn
matplotlib
pandas
library(nflfastR)
library(tidyverse)
library(fmsb) # For radar charts
pbp <- load_pbp(2023)
# Calculate QB metrics
qb_metrics <- pbp %>%
filter(!is.na(passer_player_id), play_type == "pass") %>%
group_by(passer_player_name) %>%
summarize(
attempts = n(),
epa_play = mean(epa, na.rm = TRUE),
cpoe = mean(cpoe, na.rm = TRUE),
avg_air = mean(air_yards, na.rm = TRUE),
deep_pct = mean(air_yards >= 20, na.rm = TRUE) * 100,
sack_rate = mean(sack == 1, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(attempts >= 300)
# Normalize to 0-100 scale for radar
normalize <- function(x) (x - min(x)) / (max(x) - min(x)) * 100
qb_radar <- qb_metrics %>%
mutate(across(c(epa_play, cpoe, avg_air, deep_pct), normalize)) %>%
mutate(sack_rate = 100 - normalize(sack_rate)) # Invert - lower is better
# Get top QBs for comparison
top_qbs <- c("P.Mahomes", "J.Allen", "L.Jackson")
radar_data <- qb_radar %>%
filter(passer_player_name %in% top_qbs) %>%
select(passer_player_name, epa_play, cpoe, avg_air, deep_pct, sack_rate)
print(radar_data)
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import pi
pbp = nfl.import_pbp_data([2023])
# Calculate QB metrics
passes = pbp[(pbp["passer_player_id"].notna()) & (pbp["play_type"] == "pass")]
qb_metrics = (passes.groupby("passer_player_name")
.agg(
attempts=("play_id", "count"),
epa_play=("epa", "mean"),
cpoe=("cpoe", "mean"),
avg_air=("air_yards", "mean"),
deep_pct=("air_yards", lambda x: (x >= 20).mean() * 100),
sack_rate=("sack", "mean")
)
.reset_index())
qb_metrics = qb_metrics[qb_metrics["attempts"] >= 300]
# Normalize to 0-100
def normalize(col):
return (col - col.min()) / (col.max() - col.min()) * 100
for col in ["epa_play", "cpoe", "avg_air", "deep_pct"]:
qb_metrics[col + "_norm"] = normalize(qb_metrics[col])
# Invert sack rate (lower is better)
qb_metrics["sack_norm"] = 100 - normalize(qb_metrics["sack_rate"] * 100)
# Create radar chart for top QBs
categories = ["EPA/Play", "CPOE", "Air Yards", "Deep%", "Sack Avoidance"]
qbs = ["P.Mahomes", "J.Allen", "L.Jackson"]
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]
for qb in qbs:
data = qb_metrics[qb_metrics["passer_player_name"] == qb]
if not data.empty:
values = [data["epa_play_norm"].iloc[0], data["cpoe_norm"].iloc[0],
data["avg_air_norm"].iloc[0], data["deep_pct_norm"].iloc[0],
data["sack_norm"].iloc[0]]
values += values[:1]
ax.plot(angles, values, linewidth=2, label=qb)
ax.fill(angles, values, alpha=0.1)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories)
ax.set_title("QB Comparison Radar Chart", fontweight="bold")
ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.0))
plt.tight_layout()
plt.savefig("qb_radar.png", dpi=300)
plt.show()
nflfastR
tidyverse
fmsb
matplotlib
pandas
numpy
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# EPA distribution by play type
play_epa <- pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run"))
# Create box plot
ggplot(play_epa, aes(x = play_type, y = epa, fill = play_type)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
coord_cartesian(ylim = c(-3, 3)) +
scale_fill_manual(values = c("pass" = "#1E90FF", "run" = "#228B22")) +
labs(
title = "EPA Distribution by Play Type (2023)",
x = "Play Type",
y = "EPA",
fill = "Type"
) +
theme_minimal() +
theme(legend.position = "none")
ggsave("epa_boxplot.png", width = 8, height = 6)
# Box plot by down
ggplot(play_epa %>% filter(!is.na(down), down <= 4),
aes(x = factor(down), y = epa, fill = factor(down))) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
coord_cartesian(ylim = c(-2, 2)) +
labs(title = "EPA by Down", x = "Down", y = "EPA") +
theme_minimal() +
theme(legend.position = "none")
ggsave("epa_by_down.png", width = 8, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pbp = nfl.import_pbp_data([2023])
# Filter plays with EPA
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
# Create box plot by play type
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Plot 1: By play type
sns.boxplot(data=plays, x="play_type", y="epa", ax=axes[0],
palette={"pass": "#1E90FF", "run": "#228B22"},
showfliers=False)
axes[0].set_ylim(-3, 3)
axes[0].set_xlabel("Play Type")
axes[0].set_ylabel("EPA")
axes[0].set_title("EPA Distribution by Play Type (2023)")
# Plot 2: By down
plays_by_down = plays[(plays["down"].notna()) & (plays["down"] <= 4)]
sns.boxplot(data=plays_by_down, x="down", y="epa", ax=axes[1],
palette="viridis", showfliers=False)
axes[1].set_ylim(-2, 2)
axes[1].set_xlabel("Down")
axes[1].set_ylabel("EPA")
axes[1].set_title("EPA Distribution by Down (2023)")
plt.tight_layout()
plt.savefig("epa_boxplots.png", dpi=300)
plt.show()
nflfastR
ggplot2
tidyverse
seaborn
matplotlib
pandas
library(nflfastR)
library(tidyverse)
library(plotly)
pbp <- load_pbp(2023)
# Calculate team stats
team_stats <- pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
group_by(posteam) %>%
summarize(
off_epa = mean(epa),
pass_rate = mean(play_type == "pass") * 100,
plays = n(),
.groups = "drop"
) %>%
left_join(
pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
group_by(defteam) %>%
summarize(def_epa = mean(epa)),
by = c("posteam" = "defteam")
)
# Create interactive scatter plot
p <- plot_ly(team_stats,
x = ~off_epa,
y = ~def_epa,
text = ~paste("Team:", posteam,
"<br>Off EPA:", round(off_epa, 3),
"<br>Def EPA:", round(def_epa, 3),
"<br>Pass Rate:", round(pass_rate, 1), "%"),
hoverinfo = "text",
type = "scatter",
mode = "markers",
marker = list(size = 12, opacity = 0.8)
) %>%
layout(
title = "NFL Team Efficiency (2023)",
xaxis = list(title = "Offensive EPA/Play"),
yaxis = list(title = "Defensive EPA/Play", autorange = "reversed")
)
# Save or display
htmlwidgets::saveWidget(p, "team_efficiency.html")
import nfl_data_py as nfl
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
pbp = nfl.import_pbp_data([2023])
# Calculate team stats
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
off_stats = (plays.groupby("posteam")
.agg(
off_epa=("epa", "mean"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100),
plays=("epa", "count")
)
.reset_index())
def_stats = (plays.groupby("defteam")
.agg(def_epa=("epa", "mean"))
.reset_index())
team_stats = off_stats.merge(def_stats, left_on="posteam", right_on="defteam")
# Create interactive scatter with Plotly
fig = px.scatter(
team_stats,
x="off_epa",
y="def_epa",
text="posteam",
hover_data={"off_epa": ":.3f", "def_epa": ":.3f", "pass_rate": ":.1f"},
title="NFL Team Efficiency (2023)"
)
fig.update_traces(textposition="top center", marker=dict(size=12))
fig.update_yaxes(autorange="reversed") # Lower def EPA is better
fig.update_layout(
xaxis_title="Offensive EPA/Play (higher is better)",
yaxis_title="Defensive EPA/Play (lower is better)"
)
fig.write_html("team_efficiency.html")
fig.show()
nflfastR
tidyverse
plotly
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# EPA histogram
epa_data <- pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run"))
ggplot(epa_data, aes(x = epa, fill = play_type)) +
geom_histogram(bins = 50, alpha = 0.6, position = "identity") +
scale_fill_manual(values = c("pass" = "#1E90FF", "run" = "#228B22")) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
coord_cartesian(xlim = c(-5, 5)) +
labs(
title = "Distribution of EPA by Play Type (2023)",
x = "Expected Points Added (EPA)",
y = "Count",
fill = "Play Type"
) +
theme_minimal()
ggsave("epa_histogram.png", width = 10, height = 6)
# Air yards histogram
air_data <- pbp %>%
filter(!is.na(air_yards), play_type == "pass")
ggplot(air_data, aes(x = air_yards)) +
geom_histogram(bins = 40, fill = "#1E90FF", alpha = 0.7) +
geom_vline(aes(xintercept = mean(air_yards)), color = "red",
linetype = "dashed", size = 1) +
labs(
title = "Distribution of Air Yards (2023)",
x = "Air Yards",
y = "Count",
caption = "Red line = mean"
) +
theme_minimal()
ggsave("air_yards_histogram.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
pbp = nfl.import_pbp_data([2023])
# EPA histogram by play type
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Plot 1: EPA distribution
for play_type, color in [("pass", "#1E90FF"), ("run", "#228B22")]:
data = plays[plays["play_type"] == play_type]["epa"]
axes[0].hist(data, bins=50, alpha=0.6, color=color, label=play_type.capitalize())
axes[0].axvline(x=0, color="black", linestyle="--")
axes[0].set_xlim(-5, 5)
axes[0].set_xlabel("EPA")
axes[0].set_ylabel("Count")
axes[0].set_title("Distribution of EPA by Play Type (2023)")
axes[0].legend()
# Plot 2: Air yards distribution
air_data = pbp[(pbp["air_yards"].notna()) & (pbp["play_type"] == "pass")]
axes[1].hist(air_data["air_yards"], bins=40, color="#1E90FF", alpha=0.7)
mean_air = air_data["air_yards"].mean()
axes[1].axvline(x=mean_air, color="red", linestyle="--", label=f"Mean: {mean_air:.1f}")
axes[1].set_xlabel("Air Yards")
axes[1].set_ylabel("Count")
axes[1].set_title("Distribution of Air Yards (2023)")
axes[1].legend()
plt.tight_layout()
plt.savefig("histograms.png", dpi=300)
plt.show()
nflfastR
ggplot2
tidyverse
matplotlib
pandas
library(nflfastR)
library(tidyverse)
library(corrplot)
pbp <- load_pbp(2023)
# Calculate team metrics for correlation
team_metrics <- pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
group_by(posteam) %>%
summarize(
off_epa = mean(epa),
pass_rate = mean(play_type == "pass") * 100,
pass_epa = mean(epa[play_type == "pass"]),
run_epa = mean(epa[play_type == "run"]),
success_rate = mean(success, na.rm = TRUE) * 100,
explosive_rate = mean(epa > 2, na.rm = TRUE) * 100,
.groups = "drop"
)
# Create correlation matrix
cor_matrix <- team_metrics %>%
select(-posteam) %>%
cor(use = "complete.obs")
# Visualize
corrplot(cor_matrix,
method = "color",
type = "upper",
addCoef.col = "black",
number.cex = 0.7,
tl.col = "black",
title = "Team Offensive Metrics Correlation",
mar = c(0, 0, 2, 0))
png("correlation_matrix.png", width = 800, height = 800)
corrplot(cor_matrix, method = "color", type = "upper",
addCoef.col = "black", number.cex = 0.8)
dev.off()
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
pbp = nfl.import_pbp_data([2023])
# Calculate team metrics
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
def calculate_metrics(group):
return pd.Series({
"off_epa": group["epa"].mean(),
"pass_rate": (group["play_type"] == "pass").mean() * 100,
"pass_epa": group[group["play_type"] == "pass"]["epa"].mean(),
"run_epa": group[group["play_type"] == "run"]["epa"].mean(),
"success_rate": group["success"].mean() * 100,
"explosive_rate": (group["epa"] > 2).mean() * 100
})
team_metrics = plays.groupby("posteam").apply(calculate_metrics).reset_index()
# Calculate correlation matrix
metrics_only = team_metrics.drop("posteam", axis=1)
cor_matrix = metrics_only.corr()
# Create heatmap
fig, ax = plt.subplots(figsize=(10, 8))
mask = np.triu(np.ones_like(cor_matrix, dtype=bool))
sns.heatmap(cor_matrix, mask=mask, annot=True, fmt=".2f",
cmap="coolwarm", center=0, square=True,
linewidths=0.5, ax=ax)
ax.set_title("Team Offensive Metrics Correlation", fontweight="bold")
plt.tight_layout()
plt.savefig("correlation_matrix.png", dpi=300)
plt.show()
nflfastR
tidyverse
corrplot
seaborn
matplotlib
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Calculate rolling EPA for a team
team_rolling <- pbp %>%
filter(posteam == "SF", !is.na(epa), play_type %in% c("pass", "run")) %>%
arrange(game_id, play_id) %>%
mutate(
play_num = row_number(),
rolling_epa = zoo::rollmean(epa, k = 50, fill = NA, align = "right")
)
# Define key game events
key_games <- team_rolling %>%
group_by(game_id, week) %>%
summarize(
play_num = max(play_num),
rolling_epa = last(na.omit(rolling_epa)),
.groups = "drop"
) %>%
filter(week %in% c(1, 8, 18)) # Annotate select weeks
# Create time series with annotations
ggplot(team_rolling, aes(x = play_num, y = rolling_epa)) +
geom_line(color = "#AA0000", size = 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_point(data = key_games, color = "black", size = 3) +
geom_text(data = key_games, aes(label = paste("Week", week)),
vjust = -1.5, size = 3) +
labs(
title = "San Francisco 49ers - Rolling EPA/Play (2023)",
subtitle = "50-play rolling average",
x = "Play Number",
y = "Rolling EPA/Play"
) +
theme_minimal()
ggsave("rolling_epa.png", width = 12, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
pbp = nfl.import_pbp_data([2023])
# Calculate rolling EPA for SF
sf_plays = pbp[(pbp["posteam"] == "SF") &
(pbp["epa"].notna()) &
(pbp["play_type"].isin(["pass", "run"]))].copy()
sf_plays = sf_plays.sort_values(["game_id", "play_id"])
sf_plays["play_num"] = range(1, len(sf_plays) + 1)
sf_plays["rolling_epa"] = sf_plays["epa"].rolling(window=50).mean()
# Get key week markers
key_weeks = [1, 8, 18]
annotations = []
for week in key_weeks:
week_data = sf_plays[sf_plays["week"] == week]
if not week_data.empty:
annotations.append({
"week": week,
"play_num": week_data["play_num"].max(),
"rolling_epa": week_data["rolling_epa"].dropna().iloc[-1] if not week_data["rolling_epa"].dropna().empty else 0
})
# Create plot
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(sf_plays["play_num"], sf_plays["rolling_epa"],
color="#AA0000", linewidth=1.5, label="Rolling EPA")
ax.axhline(y=0, color="gray", linestyle="--", alpha=0.7)
# Add annotations
for ann in annotations:
ax.scatter(ann["play_num"], ann["rolling_epa"], color="black", s=50, zorder=5)
ax.annotate(f"Week {ann[\"week\"]}", (ann["play_num"], ann["rolling_epa"]),
textcoords="offset points", xytext=(0, 10), ha="center")
ax.set_xlabel("Play Number")
ax.set_ylabel("Rolling EPA/Play (50-play average)")
ax.set_title("San Francisco 49ers - Rolling EPA/Play (2023)", fontweight="bold")
plt.tight_layout()
plt.savefig("rolling_epa.png", dpi=300)
plt.show()
nflfastR
tidyverse
zoo
matplotlib
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Get drive starting positions and outcomes
drive_data <- pbp %>%
filter(!is.na(drive), !is.na(yardline_100)) %>%
group_by(game_id, drive) %>%
summarize(
start_yard = first(yardline_100),
result = last(fixed_drive_result),
posteam = first(posteam),
.groups = "drop"
) %>%
filter(!is.na(result))
# Summarize by field position
field_summary <- drive_data %>%
mutate(
zone = case_when(
start_yard >= 80 ~ "Own 0-20",
start_yard >= 60 ~ "Own 20-40",
start_yard >= 40 ~ "Midfield",
start_yard >= 20 ~ "Opp 20-40",
TRUE ~ "Red Zone"
),
scored = result == "Touchdown"
) %>%
group_by(zone) %>%
summarize(
drives = n(),
tds = sum(scored),
td_rate = mean(scored) * 100,
.groups = "drop"
)
# Create bar chart
ggplot(field_summary, aes(x = reorder(zone, -td_rate), y = td_rate, fill = td_rate)) +
geom_col() +
geom_text(aes(label = paste0(round(td_rate, 1), "%")), vjust = -0.5) +
scale_fill_gradient(low = "#FFC107", high = "#28A745") +
labs(
title = "Touchdown Rate by Starting Field Position",
x = "Starting Field Position",
y = "TD Rate (%)"
) +
theme_minimal() +
theme(legend.position = "none")
ggsave("field_position.png", width = 10, height = 6)
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pbp = nfl.import_pbp_data([2023])
# Get drive starting positions and outcomes
drives = pbp[(pbp["drive"].notna()) & (pbp["yardline_100"].notna())]
drive_summary = (drives.groupby(["game_id", "drive"])
.agg(
start_yard=("yardline_100", "first"),
result=("fixed_drive_result", "last")
)
.reset_index())
drive_summary = drive_summary[drive_summary["result"].notna()]
# Assign field zones
def get_zone(yard):
if yard >= 80: return "Own 0-20"
elif yard >= 60: return "Own 20-40"
elif yard >= 40: return "Midfield"
elif yard >= 20: return "Opp 20-40"
else: return "Red Zone"
drive_summary["zone"] = drive_summary["start_yard"].apply(get_zone)
drive_summary["scored"] = drive_summary["result"] == "Touchdown"
# Calculate TD rate by zone
zone_stats = (drive_summary.groupby("zone")
.agg(
drives=("scored", "count"),
tds=("scored", "sum"),
td_rate=("scored", "mean")
)
.reset_index())
zone_stats["td_rate_pct"] = zone_stats["td_rate"] * 100
zone_stats = zone_stats.sort_values("td_rate_pct", ascending=False)
# Create bar chart
fig, ax = plt.subplots(figsize=(10, 6))
colors = plt.cm.RdYlGn(zone_stats["td_rate"])
bars = ax.bar(zone_stats["zone"], zone_stats["td_rate_pct"], color=colors)
for bar, rate in zip(bars, zone_stats["td_rate_pct"]):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
f"{rate:.1f}%", ha="center", va="bottom")
ax.set_xlabel("Starting Field Position")
ax.set_ylabel("TD Rate (%)")
ax.set_title("Touchdown Rate by Starting Field Position", fontweight="bold")
plt.tight_layout()
plt.savefig("field_position.png", dpi=300)
plt.show()
nflfastR
ggplot2
tidyverse
matplotlib
pandas
library(nflfastR)
library(tidyverse)
library(gganimate)
library(gifski)
pbp <- load_pbp(2023)
# Calculate cumulative EPA by week
weekly_cumulative <- pbp %>%
filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
group_by(posteam, week) %>%
summarize(weekly_epa = mean(epa), .groups = "drop") %>%
arrange(posteam, week) %>%
group_by(posteam) %>%
mutate(cumulative_epa = cumsum(weekly_epa) / row_number()) %>%
ungroup()
# Add rankings
weekly_ranked <- weekly_cumulative %>%
group_by(week) %>%
mutate(rank = rank(-cumulative_epa)) %>%
ungroup()
# Create animated bar chart race
p <- ggplot(weekly_ranked,
aes(x = rank, y = cumulative_epa, fill = posteam)) +
geom_col(width = 0.8) +
geom_text(aes(label = posteam), hjust = -0.1, size = 4) +
coord_flip(clip = "off") +
scale_x_reverse() +
labs(
title = "NFL EPA Rankings Through Week {closest_state}",
x = "Rank",
y = "Cumulative EPA/Play"
) +
theme_minimal() +
theme(legend.position = "none") +
transition_states(week, transition_length = 4, state_length = 1)
# Render animation
animate(p, nframes = 200, fps = 20, width = 800, height = 600)
anim_save("epa_race.gif")
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import matplotlib.animation as animation
pbp = nfl.import_pbp_data([2023])
# Calculate cumulative EPA by week
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]
weekly_epa = (plays.groupby(["posteam", "week"])["epa"]
.mean()
.reset_index()
.sort_values(["posteam", "week"]))
# Calculate cumulative average
weekly_epa["cumulative_epa"] = weekly_epa.groupby("posteam")["epa"].expanding().mean().reset_index(drop=True)
# Get rankings per week
weekly_epa["rank"] = weekly_epa.groupby("week")["cumulative_epa"].rank(ascending=False)
# Create static snapshot for a specific week (animation requires extra setup)
final_week = weekly_epa["week"].max()
final_data = weekly_epa[weekly_epa["week"] == final_week].sort_values("cumulative_epa", ascending=True)
fig, ax = plt.subplots(figsize=(10, 12))
colors = plt.cm.RdYlGn((final_data["cumulative_epa"] - final_data["cumulative_epa"].min()) /
(final_data["cumulative_epa"].max() - final_data["cumulative_epa"].min()))
bars = ax.barh(final_data["posteam"], final_data["cumulative_epa"], color=colors)
ax.set_xlabel("Cumulative EPA/Play")
ax.set_title(f"NFL EPA Rankings Through Week {final_week}", fontweight="bold")
ax.axvline(x=0, color="gray", linestyle="--", alpha=0.5)
for bar, epa in zip(bars, final_data["cumulative_epa"]):
ax.text(epa + 0.005, bar.get_y() + bar.get_height()/2,
f"{epa:.3f}", va="center", fontsize=8)
plt.tight_layout()
plt.savefig("epa_rankings.png", dpi=300)
plt.show()
print("Note: For true animation, use matplotlib FuncAnimation or plotly")
nflfastR
tidyverse
gganimate
gifski
matplotlib
pandas
nflfastR - Play-by-play data with EPAnflplotR - NFL team logos & plottingtidyverse - Data manipulation & visualizationggplot2 - Advanced visualizationsnfl_data_py - NFL data (nflverse compatible)pandas - Data manipulationmatplotlib - Visualizationsscikit-learn - Machine learningLearn the theory behind these techniques in our comprehensive tutorial series
Browse Tutorials