Github link: https://github.com/olex2148/f1colab

library(tidyverse)
library(ggplot2)
library(data.table)
library(viridis)

#ggtheme assist used for first plot
circuits <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/circuits.csv')
driver_standings <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/driver_standings.csv')
drivers <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/drivers.csv')
races <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/races.csv')
constructor_standings <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_standings.csv')
results <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/results.csv')
constructor_results <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_results.csv')
constructors <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructors.csv')

5 Better Drivers’ cumulative wins by number of races
(Who is the GOAT?)

driver_standings %>% left_join(drivers, by = c('driverId' = 'driverId')) %>%    # Join to unite names and points
  left_join(results, by = c('raceId' = 'raceId', 'driverId' = 'driverId')) %>%  # Driver position, not in the standing, but the races
  left_join(races, by = c('raceId' = 'raceId')) %>%                             # Join to get number of races drivers have been in
  filter(surname %in% c('Hamilton', 'Schumacher', 'Verstappen', 'Vettel', 'Prost')
         & forename %in% c('Lewis', 'Michael', 'Max', 'Sebastian', 'Alain'))%>% # Take out the five drivers of interest, 2 conditions because e.g. families race
  arrange(year, round) %>%                                                      # Order by year and then round so the races are in chronological order
  select(year, round, wins, position.y, surname) %>%                            # Picking relevant columns (race position)
  group_by(surname) %>%                                                         # Enables different counting of races for each driver instead of total races
  mutate(race_number = row_number(),                                            # calculate the race number of each drivers career by counting row number for each driver
         race_position = as.numeric(position.y),                                # change race position to numeric, was character
         count_wins = if_else(race_position == 1, 1, 0, missing = 0),           # Keep track of each win
         cumulative_wins = cumsum(count_wins)) %>%                              # Sum up the total number of wins
  ggplot() +                                                                    # ggplot Theme Assist :^)
  geom_line(aes(x = race_number, y = cumulative_wins, group = surname, colour = surname), size = 1.5) +
  scale_colour_manual(values = c('darkturquoise', 'red2', 'pink1', 'green3', 'yellow3')) +
  labs(title = 'Cumulative driver F1 wins over number of races',
       x = 'Race Number',
       y = 'Cumulative Wins', ) +
  theme_minimal() +
  theme(plot.title = element_text(colour = 'white', family='Georgia', size = 16, hjust = 0.5),
        plot.subtitle = element_text(colour = 'white', family='Georgia', size = 14),
        axis.title = element_text(colour = 'white', family='Georgia'),
        axis.text = element_text(colour = 'white', family='Georgia'),
        legend.text = element_text(colour = 'white', family='Georgia', size=12),
        legend.position = 'top',
        legend.title = element_blank(),
        plot.background = element_rect(fill='grey25'),
        panel.background = element_rect(fill='grey25', colour='grey25'),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank()) + theme(plot.subtitle = element_text(family = "serif"),
    panel.grid.major = element_line(colour = "white"),
    panel.grid.minor = element_line(colour = "white"),
    axis.title = element_text(family = "serif"),
    axis.text = element_text(family = "serif"),
    plot.title = element_text(family = "serif"),
    legend.text = element_text(family = "serif")) +
    theme(panel.grid.major = element_line(colour = "azure3"),
    panel.grid.minor = element_line(colour = "azure3"))


Second Plot


Violin plot with max speed per lap for the best drivers in F1


2 dataframes are immported for this plot

head(driver_standings)
##    driverStandingsId raceId driverId points position positionText wins
## 1:                 1     18        1     10        1            1    1
## 2:                 2     18        2      8        2            2    0
## 3:                 3     18        3      6        3            3    0
## 4:                 4     18        4      5        4            4    0
## 5:                 5     18        5      4        5            5    0
## 6:                 6     18        6      3        6            6    0
str(results)
## Classes 'data.table' and 'data.frame':   25220 obs. of  18 variables:
##  $ resultId       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ raceId         : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ driverId       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ constructorId  : int  1 2 3 4 1 3 5 6 2 7 ...
##  $ number         : chr  "22" "3" "7" "5" ...
##  $ grid           : int  1 5 7 11 3 13 17 15 2 18 ...
##  $ position       : chr  "1" "2" "3" "4" ...
##  $ positionText   : chr  "1" "2" "3" "4" ...
##  $ positionOrder  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ points         : num  10 8 6 5 4 3 2 1 0 0 ...
##  $ laps           : int  58 58 58 58 58 57 55 53 47 43 ...
##  $ time           : chr  "1:34:50.616" "+5.478" "+8.163" "+17.181" ...
##  $ milliseconds   : chr  "5690616" "5696094" "5698779" "5707797" ...
##  $ fastestLap     : chr  "39" "41" "41" "58" ...
##  $ rank           : chr  "2" "3" "5" "7" ...
##  $ fastestLapTime : chr  "1:27.452" "1:27.739" "1:28.090" "1:28.603" ...
##  $ fastestLapSpeed: chr  "218.300" "217.586" "216.719" "215.464" ...
##  $ statusId       : int  1 1 1 1 1 11 5 5 4 3 ...
##  - attr(*, ".internal.selfref")=<externalptr>
head(races)
##    raceId year round circuitId                  name       date     time
## 1:      1 2009     1         1 Australian Grand Prix 2009-03-29 06:00:00
## 2:      2 2009     2         2  Malaysian Grand Prix 2009-04-05 09:00:00
## 3:      3 2009     3        17    Chinese Grand Prix 2009-04-19 07:00:00
## 4:      4 2009     4         3    Bahrain Grand Prix 2009-04-26 12:00:00
## 5:      5 2009     5         4    Spanish Grand Prix 2009-05-10 12:00:00
## 6:      6 2009     6         6     Monaco Grand Prix 2009-05-24 12:00:00
##                                                        url
## 1: http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix
## 2:  http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix
## 3:    http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix
## 4:    http://en.wikipedia.org/wiki/2009_Bahrain_Grand_Prix
## 5:    http://en.wikipedia.org/wiki/2009_Spanish_Grand_Prix
## 6:     http://en.wikipedia.org/wiki/2009_Monaco_Grand_Prix
head(drivers)
##    driverId  driverRef number code forename    surname        dob nationality
## 1:        1   hamilton     44  HAM    Lewis   Hamilton 1985-01-07     British
## 2:        2   heidfeld    \\N  HEI     Nick   Heidfeld 1977-05-10      German
## 3:        3    rosberg      6  ROS     Nico    Rosberg 1985-06-27      German
## 4:        4     alonso     14  ALO Fernando     Alonso 1981-07-29     Spanish
## 5:        5 kovalainen    \\N  KOV   Heikki Kovalainen 1981-10-19     Finnish
## 6:        6   nakajima    \\N  NAK   Kazuki   Nakajima 1985-01-11    Japanese
##                                               url
## 1:    http://en.wikipedia.org/wiki/Lewis_Hamilton
## 2:     http://en.wikipedia.org/wiki/Nick_Heidfeld
## 3:      http://en.wikipedia.org/wiki/Nico_Rosberg
## 4:   http://en.wikipedia.org/wiki/Fernando_Alonso
## 5: http://en.wikipedia.org/wiki/Heikki_Kovalainen
## 6:   http://en.wikipedia.org/wiki/Kazuki_Nakajima
#dataframe with fastest lap speed, driver id and race id, this makes it earsier to add fastest lap to dataframes later
speed <- results[,c("raceId","driverId","fastestLapSpeed")]

#new data frame joins driver standings with races and drivers dataframes 
driver_results_df <- driver_standings %>%                                                
  left_join(races, by = "raceId") %>%
  left_join(drivers, by = "driverId")%>%  
  mutate(Driver = paste(forename, surname)) %>%
  left_join(speed, by = c("driverId","raceId")) %>%    # addition of speed data frame with driverId","raceId" or duplicates will be generated 
  filter(position == "1")                              #filter for winner of race, this means we only plot for the fastest lap of the races where the drivers won


driver_results_df2 <- driver_results_df[!driver_results_df$fastestLapSpeed == "\\N", ]   #remove "\\N" this is NA equivalent

#Top10 is a df with the top 10 drivers by number of wins, these will be the only drivers included in the plot
Top10 = driver_results_df2 %>% 
  count(Driver, sort=T) %>% slice(1:10)

#change lap speed from character to integer
driver_results_df2$fastestLapSpeed <-as.integer(driver_results_df2$fastestLapSpeed)

#filter driver_results_df2 with the top 10 drivers
driver_results_df2 <- driver_results_df2 %>%
  filter(Driver %in% c(Top10$Driver))


driver_results_df2 %>% 
ggplot(aes(x= fastestLapSpeed, y = Driver)) +
  geom_violin(aes(fill = nationality, color = nationality), width = 0.5, size = 0.1) +
  scale_fill_viridis(discrete = T) +
  scale_color_viridis(discrete = T) +
  geom_boxplot(width = 0.2, size = 0.05, alpha = 0.3, color = "red") +
  labs(title = "Max lap speed by driver when they won the race",  subtitle = "2015-2021" ) +
  theme(plot.title = element_text(size = 16, color = "Black"),
        plot.subtitle = element_text(size = 14, color = "red")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) + 
  labs(
    fill = "nationality", 
    color = "nationality",
    y = NULL,
    x = "Fastest Lap Avg Speed (KM/H)")