Code Examples

Copy-paste ready R and Python code for NFL analytics. From data loading to machine learning models.

122 Examples
R & Python Support: All examples include both R and Python versions. Click the tabs to switch between languages. Use the copy button to copy code to clipboard.

Data Loading

Load NFL play-by-play data using nflfastR, nfl_data_py, and various APIs

Load Play-by-Play Data (nflfastR)
Load NFL play-by-play data for one or multiple seasons using the nflfastR package.
Beginner
# Install and load nflfastR
# install.packages("nflfastR")
library(nflfastR)
library(tidyverse)

# Load single season
pbp_2023 <- load_pbp(2023)

# Load multiple seasons
pbp_multi <- load_pbp(2021:2023)

# View structure
glimpse(pbp_2023)

# Quick summary
cat("Total plays:", nrow(pbp_2023), "\n")
cat("Columns:", ncol(pbp_2023), "\n")
# Install: pip install nfl_data_py
import nfl_data_py as nfl
import pandas as pd

# Load single season
pbp_2023 = nfl.import_pbp_data([2023])

# Load multiple seasons
pbp_multi = nfl.import_pbp_data([2021, 2022, 2023])

# View structure
print(pbp_2023.info())

# Quick summary
print(f"Total plays: {len(pbp_2023)}")
print(f"Columns: {len(pbp_2023.columns)}")
Packages: nflfastR tidyverse nfl_data_py pandas
Load Roster Data
Get player roster information including positions, teams, and biographical data.
Beginner
library(nflfastR)
library(tidyverse)

# Load current rosters
rosters <- fast_scraper_roster(2023)

# Load historical rosters
rosters_multi <- fast_scraper_roster(2020:2023)

# Filter to specific position
qbs <- rosters %>%
  filter(position == "QB") %>%
  select(full_name, team, age, years_exp, draft_number)

head(qbs)
import nfl_data_py as nfl
import pandas as pd

# Load current rosters
rosters = nfl.import_rosters([2023])

# Load historical rosters
rosters_multi = nfl.import_rosters([2020, 2021, 2022, 2023])

# Filter to specific position
qbs = rosters[rosters["position"] == "QB"][
    ["player_name", "team", "age", "years_exp", "draft_number"]
]

print(qbs.head())
Packages: nflfastR tidyverse nfl_data_py pandas
Load Next Gen Stats
Access NFL Next Gen Stats data for advanced player tracking metrics.
Intermediate
library(nflfastR)
library(tidyverse)

# Load Next Gen Stats - Passing
ngs_passing <- load_nextgen_stats(
  seasons = 2023,
  stat_type = "passing"
)

# Load Next Gen Stats - Rushing
ngs_rushing <- load_nextgen_stats(
  seasons = 2023,
  stat_type = "rushing"
)

# Load Next Gen Stats - Receiving
ngs_receiving <- load_nextgen_stats(
  seasons = 2023,
  stat_type = "receiving"
)

# View top passers by avg air yards
ngs_passing %>%
  filter(week == 0) %>%  # Season totals
  arrange(desc(avg_air_yards_to_sticks)) %>%
  select(player_display_name, team_abbr, avg_air_yards_to_sticks,
         completion_percentage_above_expectation) %>%
  head(10)
import nfl_data_py as nfl
import pandas as pd

# Load Next Gen Stats - Passing
ngs_passing = nfl.import_ngs_data(
    stat_type="passing",
    years=[2023]
)

# Load Next Gen Stats - Rushing
ngs_rushing = nfl.import_ngs_data(
    stat_type="rushing",
    years=[2023]
)

# Load Next Gen Stats - Receiving
ngs_receiving = nfl.import_ngs_data(
    stat_type="receiving",
    years=[2023]
)

# View top passers by avg air yards (season totals)
season_totals = ngs_passing[ngs_passing["week"] == 0]
top_passers = season_totals.nlargest(10, "avg_air_yards_to_sticks")[
    ["player_display_name", "team_abbr", "avg_air_yards_to_sticks",
     "completion_percentage_above_expectation"]
]
print(top_passers)
Packages: nflfastR tidyverse nfl_data_py pandas
Load Combine Data
Access NFL Combine results for draft prospect evaluation.
Beginner
library(nflfastR)
library(tidyverse)

# Load combine data
combine <- load_combine()

# Filter recent years
combine_recent <- combine %>%
  filter(draft_year >= 2020)

# Top 40-yard dash times by position
combine_recent %>%
  filter(!is.na(forty)) %>%
  group_by(pos) %>%
  slice_min(forty, n = 3) %>%
  select(draft_year, player_name, pos, forty, vertical, broad_jump) %>%
  arrange(pos, forty)
import nfl_data_py as nfl
import pandas as pd

# Load combine data
combine = nfl.import_combine_data()

# Filter recent years
combine_recent = combine[combine["draft_year"] >= 2020]

# Top 40-yard dash times by position
top_forty = (combine_recent[combine_recent["forty"].notna()]
    .groupby("pos")
    .apply(lambda x: x.nsmallest(3, "forty"))
    .reset_index(drop=True)
    [["draft_year", "player_name", "pos", "forty", "vertical", "broad_jump"]]
)
print(top_forty)
Packages: nflfastR tidyverse nfl_data_py pandas
Load Schedule and Game Results
Get NFL schedules, game results, and betting lines.
Beginner
library(nflfastR)
library(tidyverse)

# Load schedules with results
schedules <- load_schedules(2020:2023)

# Filter to completed games
completed <- schedules %>%
  filter(!is.na(result))

# Calculate home win percentage
home_win_pct <- completed %>%
  summarize(
    home_wins = sum(result > 0),
    away_wins = sum(result < 0),
    ties = sum(result == 0),
    home_win_pct = home_wins / n()
  )

print(home_win_pct)

# Spread analysis
schedules %>%
  filter(!is.na(spread_line), !is.na(result)) %>%
  mutate(
    home_covered = result + spread_line > 0,
    away_covered = result + spread_line < 0
  ) %>%
  summarize(
    home_cover_rate = mean(home_covered, na.rm = TRUE),
    avg_spread = mean(spread_line),
    avg_margin = mean(abs(result))
  )
import nfl_data_py as nfl
import pandas as pd

# Load schedules with results
schedules = nfl.import_schedules([2020, 2021, 2022, 2023])

# Filter to completed games
completed = schedules[schedules["result"].notna()]

# Calculate home win percentage
home_wins = (completed["result"] > 0).sum()
away_wins = (completed["result"] < 0).sum()
ties = (completed["result"] == 0).sum()
home_win_pct = home_wins / len(completed)

print(f"Home Win %: {home_win_pct:.1%}")

# Spread analysis
with_spread = completed[completed["spread_line"].notna()]
with_spread["home_covered"] = with_spread["result"] + with_spread["spread_line"] > 0

print(f"Home Cover Rate: {with_spread['home_covered'].mean():.1%}")
print(f"Avg Spread: {with_spread['spread_line'].mean():.1f}")
print(f"Avg Margin: {with_spread['result'].abs().mean():.1f}")
Packages: nflfastR tidyverse nfl_data_py pandas
Load Player Participation Data
Get play-level player participation data showing which players were on the field.
Intermediate
library(nflfastR)
library(tidyverse)

# Load participation data
participation <- load_participation(2023)

# View structure
glimpse(participation)

# Analyze snap counts by player
snap_counts <- participation %>%
  separate_rows(offense_players, sep = ";") %>%
  filter(offense_players != "") %>%
  group_by(offense_players) %>%
  summarize(
    snaps = n(),
    games = n_distinct(game_id)
  ) %>%
  arrange(desc(snaps))

head(snap_counts, 20)
import nfl_data_py as nfl
import pandas as pd

# Load participation data
participation = nfl.import_participation([2023])

# View structure
print(participation.info())

# Count offensive snaps (simplified)
# Note: participation data structure varies
print(f"Total plays with participation: {len(participation)}")
print(participation.head())
Packages: nflfastR tidyverse nfl_data_py pandas
Load Injuries Data
Access weekly injury reports and player injury history.
Beginner
library(nflfastR)
library(tidyverse)

# Load injuries data
injuries <- load_injuries(2023)

# View structure
glimpse(injuries)

# Count injuries by team and status
injury_summary <- injuries %>%
  group_by(team, report_status) %>%
  summarize(players = n(), .groups = "drop") %>%
  pivot_wider(names_from = report_status, values_from = players, values_fill = 0)

print(injury_summary)

# Find most common injury types
injuries %>%
  filter(!is.na(report_primary_injury)) %>%
  count(report_primary_injury, sort = TRUE) %>%
  head(10)
import nfl_data_py as nfl
import pandas as pd

# Load injuries data
injuries = nfl.import_injuries([2023])

# View structure
print(injuries.info())

# Count injuries by status
if "report_status" in injuries.columns:
    status_counts = injuries["report_status"].value_counts()
    print("Injury Status Distribution:")
    print(status_counts)

# Most common injuries
if "report_primary_injury" in injuries.columns:
    injury_types = injuries["report_primary_injury"].value_counts().head(10)
    print("\nMost Common Injuries:")
    print(injury_types)
Packages: nflfastR tidyverse nfl_data_py pandas
Load Draft Picks Historical
Access historical NFL Draft data for prospect analysis.
Beginner
library(nflfastR)
library(tidyverse)

# Load draft picks
draft <- load_draft_picks()

# Filter to recent years
recent_draft <- draft %>%
  filter(season >= 2020)

# Analyze picks by position
position_analysis <- recent_draft %>%
  group_by(season, position) %>%
  summarize(
    picks = n(),
    avg_pick = mean(pick),
    .groups = "drop"
  )

# First round by position
first_round <- recent_draft %>%
  filter(round == 1) %>%
  count(position, sort = TRUE)

print(first_round)

# Top 10 picks each year
recent_draft %>%
  filter(pick <= 10) %>%
  select(season, pick, team, pfr_player_name, position) %>%
  arrange(season, pick)
import nfl_data_py as nfl
import pandas as pd

# Load draft picks
draft = nfl.import_draft_picks()

# Filter to recent years
recent_draft = draft[draft["season"] >= 2020]

# First round picks by position
first_round = recent_draft[recent_draft["round"] == 1]
position_counts = first_round["position"].value_counts()
print("First Round Picks by Position (2020+):")
print(position_counts)

# Top 10 picks
top_picks = recent_draft[recent_draft["pick"] <= 10][
    ["season", "pick", "team", "pfr_player_name", "position"]
].sort_values(["season", "pick"])
print("\nTop 10 Picks:")
print(top_picks)
Packages: nflfastR tidyverse nfl_data_py pandas
Load Snap Counts
Get weekly snap count data for all players.
Beginner
library(nflfastR)
library(tidyverse)

# Load snap counts
snaps <- load_snap_counts(2023)

# View structure
glimpse(snaps)

# Top offensive snap leaders
off_snaps <- snaps %>%
  group_by(player, position, team) %>%
  summarize(
    games = n(),
    total_off_snaps = sum(offense_snaps, na.rm = TRUE),
    avg_off_snaps = mean(offense_snaps, na.rm = TRUE),
    avg_off_pct = mean(offense_pct, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(games >= 10) %>%
  arrange(desc(total_off_snaps))

# Top snap share by position
snaps %>%
  filter(position %in% c("QB", "RB", "WR", "TE")) %>%
  group_by(position) %>%
  slice_max(offense_pct, n = 5) %>%
  select(player, team, position, week, offense_snaps, offense_pct)
import nfl_data_py as nfl
import pandas as pd

# Load snap counts
snaps = nfl.import_snap_counts([2023])

# View structure
print(snaps.info())

# Top offensive snap leaders
off_snaps = (snaps.groupby(["player", "position", "team"])
    .agg(
        games=("week", "count"),
        total_off_snaps=("offense_snaps", "sum"),
        avg_off_snaps=("offense_snaps", "mean"),
        avg_off_pct=("offense_pct", "mean")
    )
    .reset_index())

off_snaps = off_snaps[off_snaps["games"] >= 10].sort_values(
    "total_off_snaps", ascending=False)

print("Top Offensive Snap Leaders:")
print(off_snaps.head(20))
Packages: nflfastR tidyverse nfl_data_py pandas
Load Win Totals and Futures
Access preseason win total lines for betting analysis.
Intermediate
library(nflfastR)
library(tidyverse)

# Load schedules for win totals context
schedules <- load_schedules(2023)

# Calculate actual wins per team
actual_wins <- schedules %>%
  filter(!is.na(result)) %>%
  mutate(
    home_win = result > 0,
    away_win = result < 0
  ) %>%
  pivot_longer(
    cols = c(home_team, away_team),
    names_to = "location",
    values_to = "team"
  ) %>%
  mutate(
    win = case_when(
      location == "home_team" & result > 0 ~ 1,
      location == "away_team" & result < 0 ~ 1,
      result == 0 ~ 0.5,
      TRUE ~ 0
    )
  ) %>%
  group_by(team) %>%
  summarize(
    games = n(),
    wins = sum(win),
    losses = games - wins
  ) %>%
  arrange(desc(wins))

print(actual_wins)
import nfl_data_py as nfl
import pandas as pd

# Load schedules
schedules = nfl.import_schedules([2023])

# Filter to completed games
completed = schedules[schedules["result"].notna()]

# Calculate wins for home teams
home_wins = completed.groupby("home_team").apply(
    lambda x: (x["result"] > 0).sum() + (x["result"] == 0).sum() * 0.5
).reset_index(name="home_wins")

# Calculate wins for away teams
away_wins = completed.groupby("away_team").apply(
    lambda x: (x["result"] < 0).sum() + (x["result"] == 0).sum() * 0.5
).reset_index(name="away_wins")

# Combine
wins = home_wins.merge(away_wins, left_on="home_team", right_on="away_team")
wins["total_wins"] = wins["home_wins"] + wins["away_wins"]
wins = wins[["home_team", "total_wins"]].rename(columns={"home_team": "team"})
wins = wins.sort_values("total_wins", ascending=False)

print("Team Win Totals:")
print(wins)
Packages: nflfastR tidyverse nfl_data_py pandas
Load and Merge Multiple Data Sources
Combine play-by-play data with roster and participation data.
Advanced
library(nflfastR)
library(tidyverse)

# Load multiple data sources
pbp <- load_pbp(2023)
rosters <- fast_scraper_roster(2023)

# Get passer stats with roster info
passer_stats <- pbp %>%
  filter(!is.na(passer_player_id)) %>%
  group_by(passer_player_id, passer_player_name) %>%
  summarize(
    attempts = n(),
    completions = sum(complete_pass),
    yards = sum(passing_yards, na.rm = TRUE),
    tds = sum(pass_touchdown),
    ints = sum(interception),
    epa = sum(epa, na.rm = TRUE),
    .groups = "drop"
  )

# Merge with roster data
passer_with_roster <- passer_stats %>%
  left_join(
    rosters %>% select(gsis_id, full_name, team, age, height, weight, college),
    by = c("passer_player_id" = "gsis_id")
  )

# View combined data
passer_with_roster %>%
  filter(attempts >= 100) %>%
  select(passer_player_name, team, age, college, attempts, yards, tds, epa) %>%
  arrange(desc(epa))
import nfl_data_py as nfl
import pandas as pd

# Load multiple data sources
pbp = nfl.import_pbp_data([2023])
rosters = nfl.import_rosters([2023])

# Get passer stats
passer_stats = (pbp[pbp["passer_player_id"].notna()]
    .groupby(["passer_player_id", "passer_player_name"])
    .agg(
        attempts=("play_id", "count"),
        completions=("complete_pass", "sum"),
        yards=("passing_yards", "sum"),
        tds=("pass_touchdown", "sum"),
        ints=("interception", "sum"),
        epa=("epa", "sum")
    )
    .reset_index())

# Merge with roster data
passer_with_roster = passer_stats.merge(
    rosters[["gsis_id", "player_name", "team", "age", "college"]],
    left_on="passer_player_id",
    right_on="gsis_id",
    how="left"
)

# View combined data
result = (passer_with_roster[passer_with_roster["attempts"] >= 100]
    .sort_values("epa", ascending=False)
    [["passer_player_name", "team", "age", "college", "attempts", "yards", "tds", "epa"]])
print(result)
Packages: nflfastR tidyverse nfl_data_py pandas
Load QBR and Advanced Passing Metrics
Access ESPN QBR and other advanced quarterback metrics.
Intermediate
library(nflfastR)
library(tidyverse)

# Load QBR data
qbr <- load_espn_qbr(
  league = "nfl",
  seasons = 2023,
  summary_type = "season"
)

# View structure
glimpse(qbr)

# Top QBs by Total QBR
qbr %>%
  arrange(desc(qbr_total)) %>%
  select(name_display, team_abb, qbr_total, pts_added, pass_rating) %>%
  head(15)

# Weekly QBR trends
qbr_weekly <- load_espn_qbr(
  league = "nfl",
  seasons = 2023,
  summary_type = "week"
)

# Best single-game performances
qbr_weekly %>%
  arrange(desc(qbr_total)) %>%
  select(name_display, week, team_abb, qbr_total, pts_added) %>%
  head(10)
import nfl_data_py as nfl
import pandas as pd

# Load QBR data
qbr = nfl.import_qbr([2023])

# View structure
print(qbr.info())

# Top QBs by QBR (if available)
if "qbr_total" in qbr.columns:
    top_qbr = qbr.nlargest(15, "qbr_total")[
        ["name_display", "team_abb", "qbr_total", "pts_added", "pass_rating"]
    ]
    print("Top QBs by Total QBR:")
    print(top_qbr)
else:
    print("Available columns:", qbr.columns.tolist())
    print(qbr.head())
Packages: nflfastR tidyverse nfl_data_py pandas
Quick Package Reference
R Packages
  • nflfastR - Play-by-play data with EPA
  • nflplotR - NFL team logos & plotting
  • tidyverse - Data manipulation & visualization
  • ggplot2 - Advanced visualizations
Python Packages
  • nfl_data_py - NFL data (nflverse compatible)
  • pandas - Data manipulation
  • matplotlib - Visualizations
  • scikit-learn - Machine learning

Ready to Dive Deeper?

Learn the theory behind these techniques in our comprehensive tutorial series

Browse Tutorials