Science of The Super Bowl

By Jesse

(This article was first published on R – Jesse Piburn, and kindly contributed to R-bloggers)

A couple days ago, I participated in a Science of the Super Bowl Panel discussion organized by Newswise. I was asked to give a 5 (which turned more into about 10) minute overview, so I focused on answering 3 questions.

  • What is data science?
  • How is data science used in the NFL?
  • How might data science affect the outcome of the Super Bowl?

For my talk (around 21:45 mark) I made some visuals, so I thought I would recreate one here and include the R code. Here is the finished product, below is the code to reproduce it. Here is a higher res copy, that is actually readable.

######################################################
# RB direction charts
# 2/1/2017
#
#
######################################################

library(nflscrapR)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(gtable)
library(grid)
library(gridExtra)


# download all paly by play data for 2016 including playoffs -----
# this is going to take a while
reg_games <- extracting_gameids(2016, playoffs = FALSE)
post_games <- extracting_gameids(2016, playoffs = TRUE)
all_games <- c(reg_games, post_games)

game_list <- lapply(all_games, game_play_by_play)

s16 <- bind_rows(game_list)


# calculate run percentages -----
rundf <- s16 %>% filter(PlayType %in% c("Run") & 
                          !is.na(down) & 
                          posteam %in% c("NE", "ATL")) %>%
  mutate(depth = ifelse(Yards.Gained <= 0, "Negative", "Short"),
         depth = ifelse(Yards.Gained >= 5, "Middle", depth),
         depth = ifelse(Yards.Gained >= 10, "Deep", depth)) %>%
  group_by(posteam, Rusher) %>%
  mutate(down_n = n()) %>% 
  group_by(posteam, Rusher, RunLocation, RunGap, depth) %>%
  summarise(play_per = (n() / mean(down_n)) * 100,
            loc_att = n(),
            total_att = mean(down_n))

# just a little clean up -----
rundf[is.na(rundf$RunLocation), "RunLocation"] <- "middle"
rundf[rundf$RunLocation == "middle", "RunGap"] <- ""

rundf$runplace <- stringr::str_trim(paste(rundf$RunLocation, rundf$RunGap))

rundf$runplace <- factor(rundf$runplace, c("left end", "left tackle", 
                                           "left guard", "middle", 
                                           "right guard", "right tackle", 
                                           "right end"))

rundf$depth <- factor(rundf$depth, c("Negative", "Short", "Middle", "Deep"))

# not all RBs will have runs to all locations, so make a full data frame -----
# and join to it before charting
fulldf <- expand.grid("runplace" =c("left end", "left tackle", "left guard", 
                                    "middle", "right guard", "right tackle", 
                                    "right end"), 
                      "depth" = c("Negative", "Short", "Middle", "Deep"))


# Devonta Freeman -----
freemandf <- rundf %>% filter(Rusher == "D.Freeman") %>% 
  right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))

freeman_plot <- ggplot(freemandf, aes(x = runplace, y = depth)) + 
  geom_tile(aes(fill = play_per),colour = "white") +
  scale_fill_gradient(high = "#ca0020", low = "#0571b0", 
                      limits = c(0,15), guide_colourbar(title = "%")) + 
  theme_fivethirtyeight() +
    labs(title = "Devonta Freeman")


  
# Dion Lewis -----
lewisdf <- rundf %>% filter(Rusher == "D.Lewis") %>% 
  right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))

  
lewis_plot <-  ggplot(lewisdf, aes(x = runplace, y = depth)) + 
  geom_tile(aes(fill = play_per),colour = "white") +
  scale_fill_gradient(high = "#ca0020", low = "#0571b0", 
                      limits = c(0,30), guide_colourbar(title = "%")) + 
  theme_fivethirtyeight() +
    labs(title = "Dion Lewis")


  
# blount -----  
blountdf <- rundf %>% filter(Rusher == "L.Blount") %>% 
  right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))
  
  
blount_plot<-  ggplot(blountdf, aes(x = runplace, y = depth)) + 
    geom_tile(aes(fill = play_per),colour = "white") + 
    scale_fill_gradient(high = "#ca0020", low = "#0571b0", 
                        limits = c(0,15), guide_colourbar(title = "%")) + 
  theme_fivethirtyeight() +
    labs(title = "LeGarrette Blount")



# colemen -----  
colemandf <- rundf %>% filter(Rusher == "T.Coleman") %>% 
    right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))
  

coleman_plot <- ggplot(colemandf, aes(x = runplace, y = depth)) + 
    geom_tile(aes(fill = play_per),colour = "white") + 
    scale_fill_gradient(high = "#ca0020", low = "#0571b0",
                        limits = c(0,15), guide_colourbar(title = "%")) + 
  theme_fivethirtyeight() +
    labs(title = "Tevin Coleman")



# combine plots and add text -----

p1 <- arrangeGrob(freeman_plot, coleman_plot,
                    blount_plot, lewis_plot)

# sub title ----
titleback <- rectGrob(gp=gpar(fill = "#F0F0F0", col = NA))
titlesub <- textGrob("2016 Run Tendency for Super Bowl Running Backs", 
                     gp = gpar(fontsize = 12, fontface = "bold", 
                               fontfamily = "sans", col = "#3C3C3C"),
                     just = "left", x = unit(0.01, "npc"))

padding <- unit(5,"mm")

p1 <- gtable_add_rows(p1, heights = grobHeight(titlesub) + padding, pos = 0)
p1 <- gtable_add_grob(p1, grobTree(titleback, titlesub), 1, 1, 1, ncol(p1))

# title ----
title <- textGrob("Which Way Did he Go?", 
                  gp = gpar(fontsize = 18, fontface = "bold",
                            fontfamily = "sans", col = "#3C3C3C"),
                  just = "left", x = unit(0.01, "npc"))

p1 <- gtable_add_rows(p1, heights = grobHeight(title) + padding, pos = 0)
p1 <- gtable_add_grob(p1, grobTree(titleback, title), 1, 1, 1, ncol(p1))

# bottom ----
subtext <- textGrob("Source: @JesseOPiburn",
                    gp = gpar(fontsize = 10, fontface = "bold.italic",
                              fontfamily = "sans", col = "#3C3C3C"),
                    just = "left", x = unit(0.01, "npc"))

p1 <- gtable_add_rows(p1, heights = grobHeight(subtext) + padding, pos = -1)
p1 <- gtable_add_grob(p1, grobTree(titleback,subtext), t = nrow(p1), l = 1, 
                      b = nrow(p1), r = 2)
grid.draw(p1)

ggsave(plot = p1, filename = "plots/rb direction.png", 
       width = 18, height = 10, units = "in", dpi = 600)

To leave a comment for the author, please follow the link and comment on their blog: R – Jesse Piburn.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more…

Source:: R News

Leave a Reply

Your email address will not be published. Required fields are marked *

Time limit is exhausted. Please reload CAPTCHA.