NFL Analytics Code Examples - R & Python Scripts

Situational Analysis

Analyze decision-making in key game situations

Fourth Down Decision Analysis

Evaluate fourth down go-for-it decisions vs field goals/punts.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Fourth down decisions
fourth_downs <- pbp %>%
  filter(down == 4, !is.na(play_type)) %>%
  mutate(
    decision = case_when(
      play_type %in% c("pass", "run") ~ "Go for it",
      play_type == "field_goal" ~ "Field Goal",
      play_type == "punt" ~ "Punt",
      TRUE ~ "Other"
    )
  ) %>%
  filter(decision != "Other")

# Decision by field position
decision_analysis <- fourth_downs %>%
  mutate(
    zone = case_when(
      yardline_100 <= 3 ~ "Goal line (1-3)",
      yardline_100 <= 10 ~ "Red zone (4-10)",
      yardline_100 <= 40 ~ "Opp territory",
      yardline_100 <= 60 ~ "Midfield",
      TRUE ~ "Own territory"
    )
  ) %>%
  group_by(zone, decision) %>%
  summarize(plays = n(), .groups = "drop") %>%
  pivot_wider(names_from = decision, values_from = plays, values_fill = 0)

print("Fourth Down Decisions by Field Position:")
print(decision_analysis)

# Go-for-it success rate
go_for_it <- fourth_downs %>%
  filter(decision == "Go for it") %>%
  summarize(
    attempts = n(),
    conversions = sum(first_down == 1 | touchdown == 1, na.rm = TRUE),
    success_rate = mean(first_down == 1 | touchdown == 1, na.rm = TRUE) * 100
  )

print("\nGo-for-it Success Rate:")
print(go_for_it)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Fourth down decisions
fourth = pbp[(pbp["down"] == 4) & (pbp["play_type"].notna())].copy()

def get_decision(play_type):
    if play_type in ["pass", "run"]: return "Go for it"
    elif play_type == "field_goal": return "Field Goal"
    elif play_type == "punt": return "Punt"
    else: return "Other"

fourth["decision"] = fourth["play_type"].apply(get_decision)
fourth = fourth[fourth["decision"] != "Other"]

# Decision by field position
def get_zone(yd):
    if yd <= 3: return "Goal line (1-3)"
    elif yd <= 10: return "Red zone (4-10)"
    elif yd <= 40: return "Opp territory"
    elif yd <= 60: return "Midfield"
    else: return "Own territory"

fourth["zone"] = fourth["yardline_100"].apply(get_zone)

decision_analysis = (fourth.groupby(["zone", "decision"])
    .size()
    .reset_index(name="plays")
    .pivot(index="zone", columns="decision", values="plays")
    .fillna(0))

print("Fourth Down Decisions by Field Position:")
print(decision_analysis)

# Go-for-it success rate
go_for_it = fourth[fourth["decision"] == "Go for it"]
success_rate = ((go_for_it["first_down"] == 1) | (go_for_it["touchdown"] == 1)).mean() * 100
print(f"\nGo-for-it Success Rate: {success_rate:.1f}%")

Packages: nflfastR tidyverse nfl_data_py pandas

Two-Point Conversion Analysis

Analyze two-point conversion attempts and success rates.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2019:2023)

# Two-point conversion analysis
two_point <- pbp %>%
  filter(two_point_attempt == 1)

# Overall success rate
overall <- two_point %>%
  summarize(
    attempts = n(),
    successes = sum(two_point_conv_result == "success"),
    success_rate = mean(two_point_conv_result == "success") * 100
  )

print("Overall Two-Point Conversion Success:")
print(overall)

# By play type
by_type <- two_point %>%
  filter(play_type %in% c("pass", "run")) %>%
  group_by(play_type) %>%
  summarize(
    attempts = n(),
    successes = sum(two_point_conv_result == "success", na.rm = TRUE),
    success_rate = mean(two_point_conv_result == "success", na.rm = TRUE) * 100
  )

print("\nBy Play Type:")
print(by_type)

# Team leaders
team_2pt <- two_point %>%
  group_by(posteam) %>%
  summarize(
    attempts = n(),
    successes = sum(two_point_conv_result == "success", na.rm = TRUE),
    success_rate = mean(two_point_conv_result == "success", na.rm = TRUE) * 100
  ) %>%
  filter(attempts >= 5) %>%
  arrange(desc(success_rate))

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2019, 2020, 2021, 2022, 2023])

# Two-point conversions
two_point = pbp[pbp["two_point_attempt"] == 1].copy()

# Overall success rate
success = (two_point["two_point_conv_result"] == "success")
print(f"Overall 2PT Success Rate: {success.mean() * 100:.1f}% ({success.sum()}/{len(two_point)})")

# By play type
by_type = (two_point[two_point["play_type"].isin(["pass", "run"])]
    .groupby("play_type")
    .agg(
        attempts=("two_point_conv_result", "count"),
        successes=("two_point_conv_result", lambda x: (x == "success").sum()),
        success_rate=("two_point_conv_result", lambda x: (x == "success").mean() * 100)
    )
    .reset_index())

print("\nBy Play Type:")
print(by_type)

Packages: nflfastR tidyverse nfl_data_py pandas

Clock Management Analysis

Evaluate late-game clock management decisions.

Advanced

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Late game clock management (4th quarter, close game)
late_game <- pbp %>%
  filter(
    qtr == 4,
    game_seconds_remaining <= 300,  # Last 5 minutes
    abs(score_differential) <= 8,
    play_type %in% c("pass", "run")
  )

# Play selection by score differential
late_plays <- late_game %>%
  mutate(
    situation = case_when(
      score_differential > 0 ~ "Leading",
      score_differential < 0 ~ "Trailing",
      TRUE ~ "Tied"
    )
  ) %>%
  group_by(situation) %>%
  summarize(
    plays = n(),
    pass_rate = mean(play_type == "pass") * 100,
    avg_play_clock = mean(play_clock, na.rm = TRUE),
    .groups = "drop"
  )

print("Late Game Play Selection:")
print(late_plays)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Late game clock management
late_game = pbp[
    (pbp["qtr"] == 4) &
    (pbp["game_seconds_remaining"] <= 300) &
    (pbp["score_differential"].abs() <= 8) &
    (pbp["play_type"].isin(["pass", "run"]))
].copy()

def get_situation(diff):
    if diff > 0: return "Leading"
    elif diff < 0: return "Trailing"
    else: return "Tied"

late_game["situation"] = late_game["score_differential"].apply(get_situation)

late_plays = (late_game.groupby("situation")
    .agg(
        plays=("epa", "count"),
        pass_rate=("play_type", lambda x: (x == "pass").mean() * 100)
    )
    .reset_index())

print("Late Game Play Selection:")
print(late_plays)

Packages: nflfastR tidyverse nfl_data_py pandas

Timeout Usage Analysis

Analyze when teams use timeouts and their strategic impact.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Timeout usage by situation
timeouts <- pbp %>%
  filter(timeout == 1) %>%
  mutate(
    half = if_else(qtr <= 2, "First Half", "Second Half"),
    close_game = abs(score_differential) <= 8
  )

# Timeout distribution
timeout_dist <- timeouts %>%
  group_by(half, qtr) %>%
  summarize(
    timeouts = n(),
    .groups = "drop"
  )

print("Timeout Distribution by Quarter:")
print(timeout_dist)

# Team timeout usage patterns
team_timeouts <- timeouts %>%
  group_by(timeout_team) %>%
  summarize(
    total_timeouts = n(),
    first_half = sum(half == "First Half"),
    second_half = sum(half == "Second Half"),
    close_games = sum(close_game),
    avg_time_remaining = mean(game_seconds_remaining, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(total_timeouts))

print("\nTeam Timeout Usage:")
print(team_timeouts)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Filter timeouts
timeouts = pbp[pbp["timeout"] == 1].copy()
timeouts["half"] = timeouts["qtr"].apply(lambda x: "First Half" if x <= 2 else "Second Half")
timeouts["close_game"] = timeouts["score_differential"].abs() <= 8

# Distribution by quarter
timeout_dist = (timeouts.groupby(["half", "qtr"])
    .size()
    .reset_index(name="timeouts"))

print("Timeout Distribution by Quarter:")
print(timeout_dist)

# Team patterns
team_timeouts = (timeouts.groupby("timeout_team")
    .agg(
        total_timeouts=("timeout", "count"),
        first_half=("half", lambda x: (x == "First Half").sum()),
        second_half=("half", lambda x: (x == "Second Half").sum()),
        close_games=("close_game", "sum"),
        avg_time_remaining=("game_seconds_remaining", "mean")
    )
    .reset_index()
    .sort_values("total_timeouts", ascending=False))

print("\nTeam Timeout Usage:")
print(team_timeouts)

Packages: nflfastR tidyverse nfl_data_py pandas

Challenge Success Rates

Analyze coach challenge success rates and tendencies.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2019:2023)

# Replay review analysis
replays <- pbp %>%
  filter(!is.na(replay_or_challenge_result))

# Overall success rate
success_rate <- replays %>%
  mutate(overturned = replay_or_challenge_result == "overturned") %>%
  summarize(
    total_reviews = n(),
    overturned = sum(overturned),
    success_rate = mean(overturned) * 100
  )

print("Overall Challenge Results:")
print(success_rate)

# By year
by_year <- replays %>%
  mutate(
    overturned = replay_or_challenge_result == "overturned",
    year = season
  ) %>%
  group_by(year) %>%
  summarize(
    reviews = n(),
    overturned = sum(overturned),
    rate = mean(overturned) * 100,
    .groups = "drop"
  )

print("\nChallenge Success by Year:")
print(by_year)

# By play type challenged
by_type <- replays %>%
  mutate(overturned = replay_or_challenge_result == "overturned") %>%
  group_by(play_type) %>%
  summarize(
    reviews = n(),
    rate = mean(overturned) * 100,
    .groups = "drop"
  ) %>%
  filter(reviews >= 10) %>%
  arrange(desc(rate))

print("\nSuccess Rate by Play Type:")
print(by_type)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2019, 2020, 2021, 2022, 2023])

# Replay reviews
replays = pbp[pbp["replay_or_challenge_result"].notna()].copy()
replays["overturned"] = replays["replay_or_challenge_result"] == "overturned"

# Overall success rate
total = len(replays)
overturned = replays["overturned"].sum()
print(f"Overall Challenge Success: {overturned}/{total} ({overturned/total*100:.1f}%)")

# By year
by_year = (replays.groupby("season")
    .agg(
        reviews=("overturned", "count"),
        overturned=("overturned", "sum"),
        rate=("overturned", lambda x: x.mean() * 100)
    )
    .reset_index())

print("\nChallenge Success by Year:")
print(by_year)

# By play type
by_type = (replays.groupby("play_type")
    .agg(
        reviews=("overturned", "count"),
        rate=("overturned", lambda x: x.mean() * 100)
    )
    .reset_index())

by_type = by_type[by_type["reviews"] >= 10].sort_values("rate", ascending=False)
print("\nSuccess Rate by Play Type:")
print(by_type)

Packages: nflfastR tidyverse nfl_data_py pandas

Penalty Impact Analysis

Analyze penalty frequency, types, and their impact on games.

Intermediate

library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Penalty analysis
penalties <- pbp %>%
  filter(penalty == 1)

# Most common penalties
common_penalties <- penalties %>%
  filter(!is.na(penalty_type)) %>%
  count(penalty_type) %>%
  arrange(desc(n)) %>%
  head(15)

print("Most Common Penalties:")
print(common_penalties)

# Team penalty rates
team_penalties <- pbp %>%
  filter(play_type %in% c("pass", "run", "no_play")) %>%
  group_by(posteam) %>%
  summarize(
    plays = n(),
    penalties = sum(penalty == 1 & penalty_team == posteam, na.rm = TRUE),
    penalty_rate = mean(penalty == 1 & penalty_team == posteam, na.rm = TRUE) * 100,
    penalty_yards = sum(penalty_yards[penalty_team == posteam], na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(penalty_rate))

print("\nTeam Penalty Rates:")
print(team_penalties)

# Penalty impact on EPA
penalty_impact <- pbp %>%
  filter(play_type %in% c("pass", "run")) %>%
  mutate(had_penalty = penalty == 1) %>%
  group_by(had_penalty) %>%
  summarize(
    plays = n(),
    avg_epa = mean(epa, na.rm = TRUE),
    .groups = "drop"
  )

print("\nEPA Impact of Penalties:")
print(penalty_impact)

import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Penalty analysis
penalties = pbp[pbp["penalty"] == 1]

# Most common
common = (penalties[penalties["penalty_type"].notna()]
    .groupby("penalty_type")
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
    .head(15))

print("Most Common Penalties:")
print(common)

# Team penalty rates
plays = pbp[pbp["play_type"].isin(["pass", "run", "no_play"])].copy()

def calc_team_penalties(group):
    team = group.name
    team_penalties = group[(group["penalty"] == 1) & (group["penalty_team"] == team)]
    return pd.Series({
        "plays": len(group),
        "penalties": len(team_penalties),
        "penalty_rate": len(team_penalties) / len(group) * 100 if len(group) > 0 else 0,
        "penalty_yards": team_penalties["penalty_yards"].sum()
    })

team_penalties = plays.groupby("posteam").apply(calc_team_penalties).reset_index()
team_penalties = team_penalties.sort_values("penalty_rate", ascending=False)

print("\nTeam Penalty Rates:")
print(team_penalties)

# EPA impact
plays["had_penalty"] = plays["penalty"] == 1
penalty_impact = (plays.groupby("had_penalty")
    .agg(plays=("epa", "count"), avg_epa=("epa", "mean"))
    .reset_index())

print("\nEPA Impact of Penalties:")
print(penalty_impact)

Packages: nflfastR tidyverse nfl_data_py pandas

Code Examples

Situational Analysis

Fourth Down Decision Analysis

Two-Point Conversion Analysis

Clock Management Analysis

Timeout Usage Analysis

Challenge Success Rates

Penalty Impact Analysis

Quick Package Reference

R Packages

Python Packages

Ready to Dive Deeper?