Code Examples

Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.

122 Examples
R & Python Support: All examples include both R and Python versions. Click the tabs to switch between languages. Use the copy button to copy code to clipboard.

Betting Models

Build predictive models for spreads, totals, and player props

Simple Spread Prediction Model
Build a basic spread prediction model using team EPA ratings.
Advanced
library(nflfastR)
library(tidyverse)

# Load data
pbp <- load_pbp(2023)
schedules <- load_schedules(2023)

# Calculate team ratings
team_ratings <- pbp %>%
  filter(!is.na(epa)) %>%
  group_by(posteam) %>%
  summarize(off_epa = mean(epa)) %>%
  left_join(
    pbp %>%
      filter(!is.na(epa)) %>%
      group_by(defteam) %>%
      summarize(def_epa = mean(epa)),
    by = c("posteam" = "defteam")
  ) %>%
  mutate(
    net_epa = off_epa - def_epa,
    power_rating = net_epa * 3.5  # Convert to points
  )

# Create matchup predictions
games <- schedules %>%
  filter(!is.na(result)) %>%
  left_join(team_ratings, by = c("home_team" = "posteam")) %>%
  rename(home_power = power_rating) %>%
  left_join(team_ratings %>% select(posteam, power_rating),
            by = c("away_team" = "posteam")) %>%
  rename(away_power = power_rating) %>%
  mutate(
    pred_spread = away_power - home_power - 2.5,  # HFA
    actual_spread = -result,
    error = pred_spread - actual_spread,
    correct_side = (pred_spread > 0 & result < 0) |
                   (pred_spread < 0 & result > 0)
  )

# Model performance
cat("Mean Absolute Error:", mean(abs(games$error)), "\n")
cat("Correct Side %:", mean(games$correct_side) * 100, "%\n")
import nfl_data_py as nfl
import pandas as pd
import numpy as np

# Load data
pbp = nfl.import_pbp_data([2023])
schedules = nfl.import_schedules([2023])

# Calculate team ratings
plays = pbp[pbp["epa"].notna()]

off_epa = plays.groupby("posteam")["epa"].mean().reset_index()
off_epa.columns = ["team", "off_epa"]

def_epa = plays.groupby("defteam")["epa"].mean().reset_index()
def_epa.columns = ["team", "def_epa"]

team_ratings = off_epa.merge(def_epa, on="team")
team_ratings["net_epa"] = team_ratings["off_epa"] - team_ratings["def_epa"]
team_ratings["power_rating"] = team_ratings["net_epa"] * 3.5

# Create matchup predictions
games = schedules[schedules["result"].notna()].copy()
games = games.merge(
    team_ratings[["team", "power_rating"]],
    left_on="home_team", right_on="team"
).rename(columns={"power_rating": "home_power"})
games = games.merge(
    team_ratings[["team", "power_rating"]],
    left_on="away_team", right_on="team"
).rename(columns={"power_rating": "away_power"})

games["pred_spread"] = games["away_power"] - games["home_power"] - 2.5
games["actual_spread"] = -games["result"]
games["error"] = games["pred_spread"] - games["actual_spread"]
games["correct_side"] = ((games["pred_spread"] > 0) & (games["result"] < 0)) | \
                        ((games["pred_spread"] < 0) & (games["result"] > 0))

print(f"Mean Absolute Error: {games['error'].abs().mean():.2f}")
print(f"Correct Side %: {games['correct_side'].mean() * 100:.1f}%")
Packages: nflfastR tidyverse nfl_data_py pandas numpy
Over/Under Totals Model
Predict game totals using pace and efficiency metrics.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)
schedules <- load_schedules(2023)

# Calculate team scoring metrics
team_scoring <- pbp %>%
  filter(!is.na(epa)) %>%
  group_by(game_id, posteam) %>%
  summarize(
    plays = n(),
    points_scored = sum(touchdown * 7, na.rm = TRUE) +
                    sum(field_goal_result == "made" * 3, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(posteam) %>%
  summarize(
    avg_plays = mean(plays),
    avg_points = mean(points_scored),
    pace = avg_plays / 60  # plays per minute proxy
  )

# Create totals predictions
games <- schedules %>%
  filter(!is.na(result), !is.na(total)) %>%
  left_join(team_scoring, by = c("home_team" = "posteam")) %>%
  rename(home_points = avg_points, home_pace = pace) %>%
  left_join(team_scoring %>% select(posteam, avg_points, pace),
            by = c("away_team" = "posteam")) %>%
  rename(away_points = avg_points, away_pace = pace) %>%
  mutate(
    pred_total = (home_points + away_points) *
                 ((home_pace + away_pace) / 2),
    actual_total = home_score + away_score,
    over_under = if_else(actual_total > total, "Over", "Under"),
    pred_over = pred_total > total,
    actual_over = actual_total > total,
    correct = pred_over == actual_over
  )

# Model performance
cat("Correct %:", mean(games$correct) * 100, "%\n")
cat("MAE:", mean(abs(games$pred_total - games$actual_total)), "\n")
import nfl_data_py as nfl
import pandas as pd
import numpy as np

pbp = nfl.import_pbp_data([2023])
schedules = nfl.import_schedules([2023])

# Calculate team scoring metrics
plays = pbp[pbp["epa"].notna()]

game_scoring = (plays.groupby(["game_id", "posteam"])
    .agg(
        plays=("epa", "count"),
        touchdowns=("touchdown", "sum")
    )
    .reset_index())
game_scoring["points_approx"] = game_scoring["touchdowns"] * 7

team_scoring = (game_scoring.groupby("posteam")
    .agg(
        avg_plays=("plays", "mean"),
        avg_points=("points_approx", "mean")
    )
    .reset_index())
team_scoring["pace"] = team_scoring["avg_plays"] / 60

# Create totals predictions
games = schedules[schedules["result"].notna() & schedules["total"].notna()].copy()
games = games.merge(
    team_scoring[["posteam", "avg_points", "pace"]],
    left_on="home_team", right_on="posteam"
).rename(columns={"avg_points": "home_points", "pace": "home_pace"})
games = games.merge(
    team_scoring[["posteam", "avg_points", "pace"]],
    left_on="away_team", right_on="posteam"
).rename(columns={"avg_points": "away_points", "pace": "away_pace"})

games["pred_total"] = (games["home_points"] + games["away_points"]) * \
                      ((games["home_pace"] + games["away_pace"]) / 2)
games["actual_total"] = games["home_score"] + games["away_score"]
games["correct"] = (games["pred_total"] > games["total"]) == \
                   (games["actual_total"] > games["total"])

print(f"Correct %: {games['correct'].mean() * 100:.1f}%")
print(f"MAE: {(games['pred_total'] - games['actual_total']).abs().mean():.1f}")
Packages: nflfastR tidyverse nfl_data_py pandas numpy
Player Prop Projections
Project player receiving yards using target share and matchup data.
Advanced
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)

# Calculate receiver baseline stats
receiver_stats <- pbp %>%
  filter(!is.na(receiver_player_id), play_type == "pass") %>%
  group_by(receiver_player_id, receiver_player_name, posteam) %>%
  summarize(
    targets = n(),
    receptions = sum(complete_pass),
    yards = sum(yards_gained, na.rm = TRUE),
    tds = sum(touchdown),
    avg_depth = mean(air_yards, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(targets >= 50) %>%
  mutate(
    catch_rate = receptions / targets,
    yards_per_target = yards / targets,
    yards_per_reception = yards / receptions
  )

# Calculate team target rates for each receiver
team_targets <- pbp %>%
  filter(play_type == "pass", !is.na(receiver_player_id)) %>%
  group_by(posteam) %>%
  summarize(team_targets = n())

receiver_share <- receiver_stats %>%
  left_join(team_targets, by = "posteam") %>%
  mutate(target_share = targets / team_targets)

# Project yards for next game
# Assume team throws ~35 passes per game
receiver_share <- receiver_share %>%
  mutate(
    proj_targets = target_share * 35,
    proj_yards = proj_targets * yards_per_target
  ) %>%
  arrange(desc(proj_yards))

print(receiver_share %>%
        select(receiver_player_name, target_share,
               yards_per_target, proj_yards) %>%
        head(20))
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])

# Calculate receiver baseline stats
pass_plays = pbp[(pbp["receiver_player_id"].notna()) &
                 (pbp["play_type"] == "pass")]

receiver_stats = (pass_plays.groupby(
    ["receiver_player_id", "receiver_player_name", "posteam"])
    .agg(
        targets=("play_id", "count"),
        receptions=("complete_pass", "sum"),
        yards=("yards_gained", "sum"),
        tds=("touchdown", "sum"),
        avg_depth=("air_yards", "mean")
    )
    .reset_index())

receiver_stats = receiver_stats[receiver_stats["targets"] >= 50]
receiver_stats["catch_rate"] = receiver_stats["receptions"] / receiver_stats["targets"]
receiver_stats["yards_per_target"] = receiver_stats["yards"] / receiver_stats["targets"]

# Calculate team target rates
team_targets = (pass_plays.groupby("posteam")
    .size().reset_index(name="team_targets"))

receiver_share = receiver_stats.merge(team_targets, on="posteam")
receiver_share["target_share"] = receiver_share["targets"] / receiver_share["team_targets"]

# Project yards (35 passes per game assumption)
receiver_share["proj_targets"] = receiver_share["target_share"] * 35
receiver_share["proj_yards"] = receiver_share["proj_targets"] * receiver_share["yards_per_target"]

result = receiver_share.nlargest(20, "proj_yards")[
    ["receiver_player_name", "target_share", "yards_per_target", "proj_yards"]
]
print(result)
Packages: nflfastR tidyverse nfl_data_py pandas
Elo Rating System
Build and track Elo ratings for all NFL teams.
Advanced
library(nflfastR)
library(tidyverse)

schedules <- load_schedules(2020:2023)

# Initialize Elo ratings
elo <- setNames(rep(1500, 32),
                unique(c(schedules$home_team, schedules$away_team)))
k_factor <- 20

# Calculate expected score
expected <- function(r1, r2) 1 / (1 + 10^((r2 - r1)/400))

# Elo calculation function
update_elo <- function(winner_rating, loser_rating, margin) {
  exp_win <- expected(winner_rating, loser_rating)
  mov_mult <- log(abs(margin) + 1) * 2.2 / ((winner_rating - loser_rating) * 0.001 + 2.2)
  change <- k_factor * mov_mult * (1 - exp_win)
  return(change)
}

# Process games
games <- schedules %>%
  filter(!is.na(result)) %>%
  arrange(game_id)

elo_history <- list()
for (i in seq_len(nrow(games))) {
  g <- games[i,]
  home_elo <- elo[g$home_team] + 55  # HFA
  away_elo <- elo[g$away_team]

  if (g$result > 0) {  # Home win
    change <- update_elo(home_elo, away_elo, g$result)
    elo[g$home_team] <- elo[g$home_team] + change
    elo[g$away_team] <- elo[g$away_team] - change
  } else {  # Away win
    change <- update_elo(away_elo, home_elo, -g$result)
    elo[g$away_team] <- elo[g$away_team] + change
    elo[g$home_team] <- elo[g$home_team] - change
  }
}

# Current Elo rankings
elo_df <- data.frame(team = names(elo), elo = elo) %>%
  arrange(desc(elo))
print(elo_df)
import nfl_data_py as nfl
import pandas as pd
import numpy as np

schedules = nfl.import_schedules([2020, 2021, 2022, 2023])

# Initialize Elo ratings
teams = pd.concat([schedules["home_team"], schedules["away_team"]]).unique()
elo = {team: 1500 for team in teams}
k_factor = 20

def expected(r1, r2):
    return 1 / (1 + 10**((r2 - r1)/400))

def update_elo(winner_rating, loser_rating, margin):
    exp_win = expected(winner_rating, loser_rating)
    mov_mult = np.log(abs(margin) + 1) * 2.2 / ((winner_rating - loser_rating) * 0.001 + 2.2)
    change = k_factor * mov_mult * (1 - exp_win)
    return change

# Process games
games = schedules[schedules["result"].notna()].sort_values("game_id")

for _, g in games.iterrows():
    home_elo = elo[g["home_team"]] + 55  # Home field advantage
    away_elo = elo[g["away_team"]]

    if g["result"] > 0:  # Home win
        change = update_elo(home_elo, away_elo, g["result"])
        elo[g["home_team"]] += change
        elo[g["away_team"]] -= change
    else:  # Away win
        change = update_elo(away_elo, home_elo, -g["result"])
        elo[g["away_team"]] += change
        elo[g["home_team"]] -= change

# Current Elo rankings
elo_df = pd.DataFrame({"team": list(elo.keys()), "elo": list(elo.values())})
elo_df = elo_df.sort_values("elo", ascending=False).reset_index(drop=True)
print("Current Elo Rankings:")
print(elo_df)
Packages: nflfastR tidyverse nfl_data_py pandas numpy
Power Rankings Model
Create composite power rankings using multiple metrics.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)
schedules <- load_schedules(2023)

# Calculate multiple components
team_metrics <- pbp %>%
  filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
  group_by(posteam) %>%
  summarize(
    off_epa = mean(epa),
    success_rate = mean(success) * 100,
    .groups = "drop"
  ) %>%
  left_join(
    pbp %>%
      filter(!is.na(epa), play_type %in% c("pass", "run")) %>%
      group_by(defteam) %>%
      summarize(
        def_epa = mean(epa),
        def_success_allowed = mean(success) * 100
      ),
    by = c("posteam" = "defteam")
  )

# Get win-loss record
records <- schedules %>%
  filter(!is.na(result)) %>%
  pivot_longer(c(home_team, away_team), names_to = "location", values_to = "team") %>%
  mutate(
    win = (location == "home_team" & result > 0) | (location == "away_team" & result < 0)
  ) %>%
  group_by(team) %>%
  summarize(wins = sum(win), games = n(), win_pct = wins/games)

# Combine into power ranking
power_rankings <- team_metrics %>%
  left_join(records, by = c("posteam" = "team")) %>%
  mutate(
    # Normalize each metric to 0-100
    off_score = (off_epa - min(off_epa)) / (max(off_epa) - min(off_epa)) * 100,
    def_score = 100 - (def_epa - min(def_epa)) / (max(def_epa) - min(def_epa)) * 100,
    win_score = win_pct * 100,

    # Composite (40% offense, 40% defense, 20% record)
    power_rating = off_score * 0.4 + def_score * 0.4 + win_score * 0.2
  ) %>%
  arrange(desc(power_rating)) %>%
  mutate(rank = row_number())

print(power_rankings %>% select(posteam, rank, power_rating, off_epa, def_epa, win_pct))
import nfl_data_py as nfl
import pandas as pd
import numpy as np

pbp = nfl.import_pbp_data([2023])
schedules = nfl.import_schedules([2023])

# Calculate team metrics
plays = pbp[(pbp["epa"].notna()) & (pbp["play_type"].isin(["pass", "run"]))]

off_stats = plays.groupby("posteam").agg(
    off_epa=("epa", "mean"),
    success_rate=("success", lambda x: x.mean() * 100)
).reset_index()

def_stats = plays.groupby("defteam").agg(
    def_epa=("epa", "mean")
).reset_index()

team_metrics = off_stats.merge(def_stats, left_on="posteam", right_on="defteam")

# Get win-loss records
games = schedules[schedules["result"].notna()]
home_wins = games[games["result"] > 0].groupby("home_team").size()
away_wins = games[games["result"] < 0].groupby("away_team").size()
total_games = games.groupby("home_team").size() + games.groupby("away_team").size()
total_wins = home_wins.add(away_wins, fill_value=0)
win_pct = (total_wins / total_games).reset_index()
win_pct.columns = ["team", "win_pct"]

# Combine
power_rankings = team_metrics.merge(win_pct, left_on="posteam", right_on="team")

# Normalize
def normalize(col):
    return (col - col.min()) / (col.max() - col.min()) * 100

power_rankings["off_score"] = normalize(power_rankings["off_epa"])
power_rankings["def_score"] = 100 - normalize(power_rankings["def_epa"])
power_rankings["win_score"] = power_rankings["win_pct"] * 100
power_rankings["power_rating"] = (power_rankings["off_score"] * 0.4 +
                                   power_rankings["def_score"] * 0.4 +
                                   power_rankings["win_score"] * 0.2)

power_rankings = power_rankings.sort_values("power_rating", ascending=False).reset_index(drop=True)
power_rankings["rank"] = range(1, len(power_rankings) + 1)

print(power_rankings[["posteam", "rank", "power_rating", "off_epa", "def_epa", "win_pct"]])
Packages: nflfastR tidyverse nfl_data_py pandas numpy
Home Field Advantage Analysis
Quantify home field advantage across the NFL.
Intermediate
library(nflfastR)
library(tidyverse)

schedules <- load_schedules(2019:2023)

# Overall HFA
hfa_overall <- schedules %>%
  filter(!is.na(result)) %>%
  summarize(
    games = n(),
    home_wins = sum(result > 0),
    ties = sum(result == 0),
    away_wins = sum(result < 0),
    home_win_pct = mean(result > 0) * 100,
    avg_home_margin = mean(result)
  )

print("Overall Home Field Advantage:")
print(hfa_overall)

# HFA by team
team_hfa <- schedules %>%
  filter(!is.na(result)) %>%
  group_by(home_team) %>%
  summarize(
    home_games = n(),
    home_wins = sum(result > 0),
    home_win_pct = mean(result > 0) * 100,
    avg_margin = mean(result),
    .groups = "drop"
  ) %>%
  arrange(desc(home_win_pct))

print("\nHFA by Stadium:")
print(team_hfa)

# HFA by season (trend over time)
hfa_by_season <- schedules %>%
  filter(!is.na(result)) %>%
  group_by(season) %>%
  summarize(
    home_win_pct = mean(result > 0) * 100,
    avg_margin = mean(result)
  )

print("\nHFA Trend by Season:")
print(hfa_by_season)
import nfl_data_py as nfl
import pandas as pd

schedules = nfl.import_schedules([2019, 2020, 2021, 2022, 2023])

# Overall HFA
games = schedules[schedules["result"].notna()]

hfa_overall = {
    "games": len(games),
    "home_wins": (games["result"] > 0).sum(),
    "away_wins": (games["result"] < 0).sum(),
    "home_win_pct": (games["result"] > 0).mean() * 100,
    "avg_home_margin": games["result"].mean()
}

print("Overall Home Field Advantage:")
print(pd.DataFrame([hfa_overall]))

# HFA by team
team_hfa = (games.groupby("home_team")
    .agg(
        home_games=("result", "count"),
        home_wins=("result", lambda x: (x > 0).sum()),
        home_win_pct=("result", lambda x: (x > 0).mean() * 100),
        avg_margin=("result", "mean")
    )
    .reset_index()
    .sort_values("home_win_pct", ascending=False))

print("\nHFA by Stadium:")
print(team_hfa)

# HFA by season
hfa_by_season = (games.groupby("season")
    .agg(
        home_win_pct=("result", lambda x: (x > 0).mean() * 100),
        avg_margin=("result", "mean")
    )
    .reset_index())

print("\nHFA Trend by Season:")
print(hfa_by_season)
Packages: nflfastR tidyverse nfl_data_py pandas
Key Number Analysis
Analyze final score margins and key betting numbers.
Intermediate
library(nflfastR)
library(tidyverse)

schedules <- load_schedules(2018:2023)

# Calculate margin distribution
margins <- schedules %>%
  filter(!is.na(result)) %>%
  mutate(margin = abs(result)) %>%
  group_by(margin) %>%
  summarize(games = n()) %>%
  mutate(pct = games / sum(games) * 100) %>%
  arrange(desc(games))

# Top key numbers
key_numbers <- margins %>%
  head(15) %>%
  mutate(cumulative_pct = cumsum(pct))

print("Top 15 Final Margins:")
print(key_numbers)

# Games landing on 3 and 7
on_3_or_7 <- schedules %>%
  filter(!is.na(result)) %>%
  mutate(
    on_3 = abs(result) == 3,
    on_7 = abs(result) == 7,
    on_key = abs(result) %in% c(3, 7, 10, 14)
  ) %>%
  summarize(
    total_games = n(),
    on_3 = sum(on_3),
    on_7 = sum(on_7),
    on_3_pct = mean(on_3) * 100,
    on_7_pct = mean(on_7) * 100,
    any_key_pct = mean(on_key) * 100
  )

print("\nKey Number Summary:")
print(on_3_or_7)
import nfl_data_py as nfl
import pandas as pd

schedules = nfl.import_schedules([2018, 2019, 2020, 2021, 2022, 2023])

# Calculate margin distribution
games = schedules[schedules["result"].notna()].copy()
games["margin"] = games["result"].abs()

margins = (games.groupby("margin")
    .size()
    .reset_index(name="games")
    .sort_values("games", ascending=False))

margins["pct"] = margins["games"] / margins["games"].sum() * 100
margins["cumulative_pct"] = margins["pct"].cumsum()

print("Top 15 Final Margins:")
print(margins.head(15))

# Key number analysis
games["on_3"] = games["margin"] == 3
games["on_7"] = games["margin"] == 7
games["on_key"] = games["margin"].isin([3, 7, 10, 14])

key_summary = {
    "total_games": len(games),
    "on_3": games["on_3"].sum(),
    "on_7": games["on_7"].sum(),
    "on_3_pct": games["on_3"].mean() * 100,
    "on_7_pct": games["on_7"].mean() * 100,
    "any_key_pct": games["on_key"].mean() * 100
}

print("\nKey Number Summary:")
print(pd.DataFrame([key_summary]))
Packages: nflfastR tidyverse nfl_data_py pandas
Closing Line Value Analysis
Track line movement and identify CLV opportunities.
Advanced
library(nflfastR)
library(tidyverse)

schedules <- load_schedules(2023)

# Analyze spread movement patterns
spread_analysis <- schedules %>%
  filter(!is.na(result), !is.na(spread_line)) %>%
  mutate(
    # Positive result means home win
    home_covered = result + spread_line > 0,
    favorite = if_else(spread_line < 0, "Home", "Away"),
    spread_bucket = case_when(
      abs(spread_line) <= 3 ~ "Pick/FG",
      abs(spread_line) <= 7 ~ "4-7 pts",
      abs(spread_line) <= 10 ~ "8-10 pts",
      TRUE ~ "10+ pts"
    )
  )

# ATS performance by spread size
ats_by_spread <- spread_analysis %>%
  group_by(spread_bucket) %>%
  summarize(
    games = n(),
    home_cover_pct = mean(home_covered) * 100,
    avg_margin = mean(result + spread_line),
    .groups = "drop"
  )

print("ATS Performance by Spread Size:")
print(ats_by_spread)

# Favorite vs underdog ATS
fav_vs_dog <- spread_analysis %>%
  mutate(
    favorite_covered = (favorite == "Home" & home_covered) |
                       (favorite == "Away" & !home_covered)
  ) %>%
  summarize(
    games = n(),
    favorite_cover_pct = mean(favorite_covered) * 100,
    dog_cover_pct = (1 - mean(favorite_covered)) * 100
  )

print("\nFavorite vs Underdog ATS:")
print(fav_vs_dog)
import nfl_data_py as nfl
import pandas as pd
import numpy as np

schedules = nfl.import_schedules([2023])

# Analyze spread patterns
games = schedules[(schedules["result"].notna()) & (schedules["spread_line"].notna())].copy()

games["home_covered"] = games["result"] + games["spread_line"] > 0
games["favorite"] = np.where(games["spread_line"] < 0, "Home", "Away")

def spread_bucket(s):
    s = abs(s)
    if s <= 3: return "Pick/FG"
    elif s <= 7: return "4-7 pts"
    elif s <= 10: return "8-10 pts"
    else: return "10+ pts"

games["spread_bucket"] = games["spread_line"].apply(spread_bucket)

# ATS performance by spread size
ats_by_spread = (games.groupby("spread_bucket")
    .agg(
        games=("home_covered", "count"),
        home_cover_pct=("home_covered", lambda x: x.mean() * 100),
        avg_margin=("result", lambda x: (x + games.loc[x.index, "spread_line"]).mean())
    )
    .reset_index())

print("ATS Performance by Spread Size:")
print(ats_by_spread)

# Favorite vs underdog
games["favorite_covered"] = ((games["favorite"] == "Home") & games["home_covered"]) | \
                            ((games["favorite"] == "Away") & ~games["home_covered"])

fav_cover_pct = games["favorite_covered"].mean() * 100
print(f"\nFavorite cover %: {fav_cover_pct:.1f}%")
print(f"Underdog cover %: {100 - fav_cover_pct:.1f}%")
Packages: nflfastR tidyverse nfl_data_py pandas numpy
First Half vs Full Game Analysis
Compare first half and full game betting results.
Intermediate
library(nflfastR)
library(tidyverse)

pbp <- load_pbp(2023)
schedules <- load_schedules(2023)

# Calculate first half scores
first_half <- pbp %>%
  filter(qtr <= 2) %>%
  group_by(game_id) %>%
  summarize(
    home_1h = sum(home_score[play_id == max(play_id)]),
    away_1h = sum(away_score[play_id == max(play_id)]),
    .groups = "drop"
  )

# Get final scores
final_scores <- schedules %>%
  filter(!is.na(result)) %>%
  select(game_id, home_score, away_score, spread_line, result)

# Combine
game_halves <- final_scores %>%
  left_join(first_half, by = "game_id") %>%
  mutate(
    home_2h = home_score - home_1h,
    away_2h = away_score - away_1h,
    result_1h = home_1h - away_1h,
    result_2h = home_2h - away_2h,

    # Compare halves
    home_stronger_2h = result_2h > result_1h,
    halves_same_winner = (result_1h > 0) == (result > 0)
  )

# Analysis
half_analysis <- game_halves %>%
  summarize(
    games = n(),
    avg_1h_margin = mean(result_1h),
    avg_2h_margin = mean(result_2h),
    same_winner_pct = mean(halves_same_winner, na.rm = TRUE) * 100,
    home_stronger_2h_pct = mean(home_stronger_2h, na.rm = TRUE) * 100
  )

print("First Half vs Second Half Analysis:")
print(half_analysis)
import nfl_data_py as nfl
import pandas as pd

pbp = nfl.import_pbp_data([2023])
schedules = nfl.import_schedules([2023])

# Calculate first half scores
first_half = (pbp[pbp["qtr"] <= 2]
    .groupby("game_id")
    .agg(
        home_1h=("home_score", "last"),
        away_1h=("away_score", "last")
    )
    .reset_index())

# Get final scores
final_scores = schedules[schedules["result"].notna()][
    ["game_id", "home_score", "away_score", "spread_line", "result"]
]

# Combine
game_halves = final_scores.merge(first_half, on="game_id")
game_halves["home_2h"] = game_halves["home_score"] - game_halves["home_1h"]
game_halves["away_2h"] = game_halves["away_score"] - game_halves["away_1h"]
game_halves["result_1h"] = game_halves["home_1h"] - game_halves["away_1h"]
game_halves["result_2h"] = game_halves["home_2h"] - game_halves["away_2h"]

game_halves["same_winner"] = (game_halves["result_1h"] > 0) == (game_halves["result"] > 0)
game_halves["home_stronger_2h"] = game_halves["result_2h"] > game_halves["result_1h"]

# Analysis
half_analysis = {
    "games": len(game_halves),
    "avg_1h_margin": game_halves["result_1h"].mean(),
    "avg_2h_margin": game_halves["result_2h"].mean(),
    "same_winner_pct": game_halves["same_winner"].mean() * 100,
    "home_stronger_2h_pct": game_halves["home_stronger_2h"].mean() * 100
}

print("First Half vs Second Half Analysis:")
print(pd.DataFrame([half_analysis]))
Packages: nflfastR tidyverse nfl_data_py pandas
Division Game Trends
Analyze betting patterns for division rivalry games.
Intermediate
library(nflfastR)
library(tidyverse)

schedules <- load_schedules(2019:2023)

# Define divisions
divisions <- list(
  AFC_East = c("BUF", "MIA", "NE", "NYJ"),
  AFC_North = c("BAL", "CIN", "CLE", "PIT"),
  AFC_South = c("HOU", "IND", "JAX", "TEN"),
  AFC_West = c("DEN", "KC", "LV", "LAC"),
  NFC_East = c("DAL", "NYG", "PHI", "WAS"),
  NFC_North = c("CHI", "DET", "GB", "MIN"),
  NFC_South = c("ATL", "CAR", "NO", "TB"),
  NFC_West = c("ARI", "LAR", "SF", "SEA")
)

# Function to find division
get_division <- function(team) {
  for (div in names(divisions)) {
    if (team %in% divisions[[div]]) return(div)
  }
  return(NA)
}

# Analyze division games
div_games <- schedules %>%
  filter(!is.na(result), !is.na(spread_line)) %>%
  rowwise() %>%
  mutate(
    home_div = get_division(home_team),
    away_div = get_division(away_team),
    is_division_game = home_div == away_div
  ) %>%
  ungroup()

# Compare division vs non-division
div_comparison <- div_games %>%
  group_by(is_division_game) %>%
  summarize(
    games = n(),
    home_win_pct = mean(result > 0) * 100,
    home_cover_pct = mean(result + spread_line > 0) * 100,
    avg_total = mean(home_score + away_score),
    avg_margin = mean(abs(result)),
    .groups = "drop"
  )

print("Division vs Non-Division Game Analysis:")
print(div_comparison)
import nfl_data_py as nfl
import pandas as pd

schedules = nfl.import_schedules([2019, 2020, 2021, 2022, 2023])

# Define divisions
divisions = {
    "AFC_East": ["BUF", "MIA", "NE", "NYJ"],
    "AFC_North": ["BAL", "CIN", "CLE", "PIT"],
    "AFC_South": ["HOU", "IND", "JAX", "TEN"],
    "AFC_West": ["DEN", "KC", "LV", "LAC"],
    "NFC_East": ["DAL", "NYG", "PHI", "WAS"],
    "NFC_North": ["CHI", "DET", "GB", "MIN"],
    "NFC_South": ["ATL", "CAR", "NO", "TB"],
    "NFC_West": ["ARI", "LAR", "SF", "SEA"]
}

def get_division(team):
    for div, teams in divisions.items():
        if team in teams:
            return div
    return None

# Analyze division games
games = schedules[(schedules["result"].notna()) & (schedules["spread_line"].notna())].copy()
games["home_div"] = games["home_team"].apply(get_division)
games["away_div"] = games["away_team"].apply(get_division)
games["is_division_game"] = games["home_div"] == games["away_div"]

# Compare division vs non-division
div_comparison = (games.groupby("is_division_game")
    .agg(
        games=("result", "count"),
        home_win_pct=("result", lambda x: (x > 0).mean() * 100),
        home_cover_pct=("result", lambda x: ((x + games.loc[x.index, "spread_line"]) > 0).mean() * 100),
        avg_total=("home_score", lambda x: (x + games.loc[x.index, "away_score"]).mean()),
        avg_margin=("result", lambda x: x.abs().mean())
    )
    .reset_index())

div_comparison["is_division_game"] = div_comparison["is_division_game"].map({True: "Division", False: "Non-Division"})
print("Division vs Non-Division Game Analysis:")
print(div_comparison)
Packages: nflfastR tidyverse nfl_data_py pandas
Quick Package Reference
R Packages
  • nflfastR - Play-by-play data with EPA
  • nflplotR - NFL team logos & plotting
  • tidyverse - Data manipulation & visualization
  • ggplot2 - Advanced visualizations
Python Packages
  • nfl_data_py - NFL data (nflverse compatible)
  • pandas - Data manipulation
  • matplotlib - Visualizations
  • scikit-learn - Machine learning

Ready to Dive Deeper?

Learn the theory behind these techniques in our comprehensive tutorial series

Browse Tutorials