Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.
Analyze decision-making in key game situations
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Fourth down decisions
fourth_downs <- pbp %>%
filter(down == 4, !is.na(play_type)) %>%
mutate(
decision = case_when(
play_type %in% c("pass", "run") ~ "Go for it",
play_type == "field_goal" ~ "Field Goal",
play_type == "punt" ~ "Punt",
TRUE ~ "Other"
)
) %>%
filter(decision != "Other")
# Decision by field position
decision_analysis <- fourth_downs %>%
mutate(
zone = case_when(
yardline_100 <= 3 ~ "Goal line (1-3)",
yardline_100 <= 10 ~ "Red zone (4-10)",
yardline_100 <= 40 ~ "Opp territory",
yardline_100 <= 60 ~ "Midfield",
TRUE ~ "Own territory"
)
) %>%
group_by(zone, decision) %>%
summarize(plays = n(), .groups = "drop") %>%
pivot_wider(names_from = decision, values_from = plays, values_fill = 0)
print("Fourth Down Decisions by Field Position:")
print(decision_analysis)
# Go-for-it success rate
go_for_it <- fourth_downs %>%
filter(decision == "Go for it") %>%
summarize(
attempts = n(),
conversions = sum(first_down == 1 | touchdown == 1, na.rm = TRUE),
success_rate = mean(first_down == 1 | touchdown == 1, na.rm = TRUE) * 100
)
print("\nGo-for-it Success Rate:")
print(go_for_it)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Fourth down decisions
fourth = pbp[(pbp["down"] == 4) & (pbp["play_type"].notna())].copy()
def get_decision(play_type):
if play_type in ["pass", "run"]: return "Go for it"
elif play_type == "field_goal": return "Field Goal"
elif play_type == "punt": return "Punt"
else: return "Other"
fourth["decision"] = fourth["play_type"].apply(get_decision)
fourth = fourth[fourth["decision"] != "Other"]
# Decision by field position
def get_zone(yd):
if yd <= 3: return "Goal line (1-3)"
elif yd <= 10: return "Red zone (4-10)"
elif yd <= 40: return "Opp territory"
elif yd <= 60: return "Midfield"
else: return "Own territory"
fourth["zone"] = fourth["yardline_100"].apply(get_zone)
decision_analysis = (fourth.groupby(["zone", "decision"])
.size()
.reset_index(name="plays")
.pivot(index="zone", columns="decision", values="plays")
.fillna(0))
print("Fourth Down Decisions by Field Position:")
print(decision_analysis)
# Go-for-it success rate
go_for_it = fourth[fourth["decision"] == "Go for it"]
success_rate = ((go_for_it["first_down"] == 1) | (go_for_it["touchdown"] == 1)).mean() * 100
print(f"\nGo-for-it Success Rate: {success_rate:.1f}%")
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2019:2023)
# Two-point conversion analysis
two_point <- pbp %>%
filter(two_point_attempt == 1)
# Overall success rate
overall <- two_point %>%
summarize(
attempts = n(),
successes = sum(two_point_conv_result == "success"),
success_rate = mean(two_point_conv_result == "success") * 100
)
print("Overall Two-Point Conversion Success:")
print(overall)
# By play type
by_type <- two_point %>%
filter(play_type %in% c("pass", "run")) %>%
group_by(play_type) %>%
summarize(
attempts = n(),
successes = sum(two_point_conv_result == "success", na.rm = TRUE),
success_rate = mean(two_point_conv_result == "success", na.rm = TRUE) * 100
)
print("\nBy Play Type:")
print(by_type)
# Team leaders
team_2pt <- two_point %>%
group_by(posteam) %>%
summarize(
attempts = n(),
successes = sum(two_point_conv_result == "success", na.rm = TRUE),
success_rate = mean(two_point_conv_result == "success", na.rm = TRUE) * 100
) %>%
filter(attempts >= 5) %>%
arrange(desc(success_rate))
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2019, 2020, 2021, 2022, 2023])
# Two-point conversions
two_point = pbp[pbp["two_point_attempt"] == 1].copy()
# Overall success rate
success = (two_point["two_point_conv_result"] == "success")
print(f"Overall 2PT Success Rate: {success.mean() * 100:.1f}% ({success.sum()}/{len(two_point)})")
# By play type
by_type = (two_point[two_point["play_type"].isin(["pass", "run"])]
.groupby("play_type")
.agg(
attempts=("two_point_conv_result", "count"),
successes=("two_point_conv_result", lambda x: (x == "success").sum()),
success_rate=("two_point_conv_result", lambda x: (x == "success").mean() * 100)
)
.reset_index())
print("\nBy Play Type:")
print(by_type)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Late game clock management (4th quarter, close game)
late_game <- pbp %>%
filter(
qtr == 4,
game_seconds_remaining <= 300, # Last 5 minutes
abs(score_differential) <= 8,
play_type %in% c("pass", "run")
)
# Play selection by score differential
late_plays <- late_game %>%
mutate(
situation = case_when(
score_differential > 0 ~ "Leading",
score_differential < 0 ~ "Trailing",
TRUE ~ "Tied"
)
) %>%
group_by(situation) %>%
summarize(
plays = n(),
pass_rate = mean(play_type == "pass") * 100,
avg_play_clock = mean(play_clock, na.rm = TRUE),
.groups = "drop"
)
print("Late Game Play Selection:")
print(late_plays)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Late game clock management
late_game = pbp[
(pbp["qtr"] == 4) &
(pbp["game_seconds_remaining"] <= 300) &
(pbp["score_differential"].abs() <= 8) &
(pbp["play_type"].isin(["pass", "run"]))
].copy()
def get_situation(diff):
if diff > 0: return "Leading"
elif diff < 0: return "Trailing"
else: return "Tied"
late_game["situation"] = late_game["score_differential"].apply(get_situation)
late_plays = (late_game.groupby("situation")
.agg(
plays=("epa", "count"),
pass_rate=("play_type", lambda x: (x == "pass").mean() * 100)
)
.reset_index())
print("Late Game Play Selection:")
print(late_plays)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Timeout usage by situation
timeouts <- pbp %>%
filter(timeout == 1) %>%
mutate(
half = if_else(qtr <= 2, "First Half", "Second Half"),
close_game = abs(score_differential) <= 8
)
# Timeout distribution
timeout_dist <- timeouts %>%
group_by(half, qtr) %>%
summarize(
timeouts = n(),
.groups = "drop"
)
print("Timeout Distribution by Quarter:")
print(timeout_dist)
# Team timeout usage patterns
team_timeouts <- timeouts %>%
group_by(timeout_team) %>%
summarize(
total_timeouts = n(),
first_half = sum(half == "First Half"),
second_half = sum(half == "Second Half"),
close_games = sum(close_game),
avg_time_remaining = mean(game_seconds_remaining, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(total_timeouts))
print("\nTeam Timeout Usage:")
print(team_timeouts)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Filter timeouts
timeouts = pbp[pbp["timeout"] == 1].copy()
timeouts["half"] = timeouts["qtr"].apply(lambda x: "First Half" if x <= 2 else "Second Half")
timeouts["close_game"] = timeouts["score_differential"].abs() <= 8
# Distribution by quarter
timeout_dist = (timeouts.groupby(["half", "qtr"])
.size()
.reset_index(name="timeouts"))
print("Timeout Distribution by Quarter:")
print(timeout_dist)
# Team patterns
team_timeouts = (timeouts.groupby("timeout_team")
.agg(
total_timeouts=("timeout", "count"),
first_half=("half", lambda x: (x == "First Half").sum()),
second_half=("half", lambda x: (x == "Second Half").sum()),
close_games=("close_game", "sum"),
avg_time_remaining=("game_seconds_remaining", "mean")
)
.reset_index()
.sort_values("total_timeouts", ascending=False))
print("\nTeam Timeout Usage:")
print(team_timeouts)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2019:2023)
# Replay review analysis
replays <- pbp %>%
filter(!is.na(replay_or_challenge_result))
# Overall success rate
success_rate <- replays %>%
mutate(overturned = replay_or_challenge_result == "overturned") %>%
summarize(
total_reviews = n(),
overturned = sum(overturned),
success_rate = mean(overturned) * 100
)
print("Overall Challenge Results:")
print(success_rate)
# By year
by_year <- replays %>%
mutate(
overturned = replay_or_challenge_result == "overturned",
year = season
) %>%
group_by(year) %>%
summarize(
reviews = n(),
overturned = sum(overturned),
rate = mean(overturned) * 100,
.groups = "drop"
)
print("\nChallenge Success by Year:")
print(by_year)
# By play type challenged
by_type <- replays %>%
mutate(overturned = replay_or_challenge_result == "overturned") %>%
group_by(play_type) %>%
summarize(
reviews = n(),
rate = mean(overturned) * 100,
.groups = "drop"
) %>%
filter(reviews >= 10) %>%
arrange(desc(rate))
print("\nSuccess Rate by Play Type:")
print(by_type)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2019, 2020, 2021, 2022, 2023])
# Replay reviews
replays = pbp[pbp["replay_or_challenge_result"].notna()].copy()
replays["overturned"] = replays["replay_or_challenge_result"] == "overturned"
# Overall success rate
total = len(replays)
overturned = replays["overturned"].sum()
print(f"Overall Challenge Success: {overturned}/{total} ({overturned/total*100:.1f}%)")
# By year
by_year = (replays.groupby("season")
.agg(
reviews=("overturned", "count"),
overturned=("overturned", "sum"),
rate=("overturned", lambda x: x.mean() * 100)
)
.reset_index())
print("\nChallenge Success by Year:")
print(by_year)
# By play type
by_type = (replays.groupby("play_type")
.agg(
reviews=("overturned", "count"),
rate=("overturned", lambda x: x.mean() * 100)
)
.reset_index())
by_type = by_type[by_type["reviews"] >= 10].sort_values("rate", ascending=False)
print("\nSuccess Rate by Play Type:")
print(by_type)
nflfastR
tidyverse
nfl_data_py
pandas
library(nflfastR)
library(tidyverse)
pbp <- load_pbp(2023)
# Penalty analysis
penalties <- pbp %>%
filter(penalty == 1)
# Most common penalties
common_penalties <- penalties %>%
filter(!is.na(penalty_type)) %>%
count(penalty_type) %>%
arrange(desc(n)) %>%
head(15)
print("Most Common Penalties:")
print(common_penalties)
# Team penalty rates
team_penalties <- pbp %>%
filter(play_type %in% c("pass", "run", "no_play")) %>%
group_by(posteam) %>%
summarize(
plays = n(),
penalties = sum(penalty == 1 & penalty_team == posteam, na.rm = TRUE),
penalty_rate = mean(penalty == 1 & penalty_team == posteam, na.rm = TRUE) * 100,
penalty_yards = sum(penalty_yards[penalty_team == posteam], na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(penalty_rate))
print("\nTeam Penalty Rates:")
print(team_penalties)
# Penalty impact on EPA
penalty_impact <- pbp %>%
filter(play_type %in% c("pass", "run")) %>%
mutate(had_penalty = penalty == 1) %>%
group_by(had_penalty) %>%
summarize(
plays = n(),
avg_epa = mean(epa, na.rm = TRUE),
.groups = "drop"
)
print("\nEPA Impact of Penalties:")
print(penalty_impact)
import nfl_data_py as nfl
import pandas as pd
pbp = nfl.import_pbp_data([2023])
# Penalty analysis
penalties = pbp[pbp["penalty"] == 1]
# Most common
common = (penalties[penalties["penalty_type"].notna()]
.groupby("penalty_type")
.size()
.reset_index(name="count")
.sort_values("count", ascending=False)
.head(15))
print("Most Common Penalties:")
print(common)
# Team penalty rates
plays = pbp[pbp["play_type"].isin(["pass", "run", "no_play"])].copy()
def calc_team_penalties(group):
team = group.name
team_penalties = group[(group["penalty"] == 1) & (group["penalty_team"] == team)]
return pd.Series({
"plays": len(group),
"penalties": len(team_penalties),
"penalty_rate": len(team_penalties) / len(group) * 100 if len(group) > 0 else 0,
"penalty_yards": team_penalties["penalty_yards"].sum()
})
team_penalties = plays.groupby("posteam").apply(calc_team_penalties).reset_index()
team_penalties = team_penalties.sort_values("penalty_rate", ascending=False)
print("\nTeam Penalty Rates:")
print(team_penalties)
# EPA impact
plays["had_penalty"] = plays["penalty"] == 1
penalty_impact = (plays.groupby("had_penalty")
.agg(plays=("epa", "count"), avg_epa=("epa", "mean"))
.reset_index())
print("\nEPA Impact of Penalties:")
print(penalty_impact)
nflfastR
tidyverse
nfl_data_py
pandas
nflfastR - Play-by-play data with EPAnflplotR - NFL team logos & plottingtidyverse - Data manipulation & visualizationggplot2 - Advanced visualizationsnfl_data_py - NFL data (nflverse compatible)pandas - Data manipulationmatplotlib - Visualizationsscikit-learn - Machine learningLearn the theory behind these techniques in our comprehensive tutorial series
Browse Tutorials