Github link: https://github.com/olex2148/f1colab
library(tidyverse)
library(ggplot2)
library(data.table)
library(viridis)
#ggtheme assist used for first plot
circuits <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/circuits.csv')
driver_standings <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/driver_standings.csv')
drivers <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/drivers.csv')
races <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/races.csv')
constructor_standings <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_standings.csv')
results <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/results.csv')
constructor_results <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructor_results.csv')
constructors <- fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-09-07/constructors.csv')
driver_standings %>% left_join(drivers, by = c('driverId' = 'driverId')) %>% # Join to unite names and points
left_join(results, by = c('raceId' = 'raceId', 'driverId' = 'driverId')) %>% # Driver position, not in the standing, but the races
left_join(races, by = c('raceId' = 'raceId')) %>% # Join to get number of races drivers have been in
filter(surname %in% c('Hamilton', 'Schumacher', 'Verstappen', 'Vettel', 'Prost')
& forename %in% c('Lewis', 'Michael', 'Max', 'Sebastian', 'Alain'))%>% # Take out the five drivers of interest, 2 conditions because e.g. families race
arrange(year, round) %>% # Order by year and then round so the races are in chronological order
select(year, round, wins, position.y, surname) %>% # Picking relevant columns (race position)
group_by(surname) %>% # Enables different counting of races for each driver instead of total races
mutate(race_number = row_number(), # calculate the race number of each drivers career by counting row number for each driver
race_position = as.numeric(position.y), # change race position to numeric, was character
count_wins = if_else(race_position == 1, 1, 0, missing = 0), # Keep track of each win
cumulative_wins = cumsum(count_wins)) %>% # Sum up the total number of wins
ggplot() + # ggplot Theme Assist :^)
geom_line(aes(x = race_number, y = cumulative_wins, group = surname, colour = surname), size = 1.5) +
scale_colour_manual(values = c('darkturquoise', 'red2', 'pink1', 'green3', 'yellow3')) +
labs(title = 'Cumulative driver F1 wins over number of races',
x = 'Race Number',
y = 'Cumulative Wins', ) +
theme_minimal() +
theme(plot.title = element_text(colour = 'white', family='Georgia', size = 16, hjust = 0.5),
plot.subtitle = element_text(colour = 'white', family='Georgia', size = 14),
axis.title = element_text(colour = 'white', family='Georgia'),
axis.text = element_text(colour = 'white', family='Georgia'),
legend.text = element_text(colour = 'white', family='Georgia', size=12),
legend.position = 'top',
legend.title = element_blank(),
plot.background = element_rect(fill='grey25'),
panel.background = element_rect(fill='grey25', colour='grey25'),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank()) + theme(plot.subtitle = element_text(family = "serif"),
panel.grid.major = element_line(colour = "white"),
panel.grid.minor = element_line(colour = "white"),
axis.title = element_text(family = "serif"),
axis.text = element_text(family = "serif"),
plot.title = element_text(family = "serif"),
legend.text = element_text(family = "serif")) +
theme(panel.grid.major = element_line(colour = "azure3"),
panel.grid.minor = element_line(colour = "azure3"))
head(driver_standings)
## driverStandingsId raceId driverId points position positionText wins
## 1: 1 18 1 10 1 1 1
## 2: 2 18 2 8 2 2 0
## 3: 3 18 3 6 3 3 0
## 4: 4 18 4 5 4 4 0
## 5: 5 18 5 4 5 5 0
## 6: 6 18 6 3 6 6 0
str(results)
## Classes 'data.table' and 'data.frame': 25220 obs. of 18 variables:
## $ resultId : int 1 2 3 4 5 6 7 8 9 10 ...
## $ raceId : int 18 18 18 18 18 18 18 18 18 18 ...
## $ driverId : int 1 2 3 4 5 6 7 8 9 10 ...
## $ constructorId : int 1 2 3 4 1 3 5 6 2 7 ...
## $ number : chr "22" "3" "7" "5" ...
## $ grid : int 1 5 7 11 3 13 17 15 2 18 ...
## $ position : chr "1" "2" "3" "4" ...
## $ positionText : chr "1" "2" "3" "4" ...
## $ positionOrder : int 1 2 3 4 5 6 7 8 9 10 ...
## $ points : num 10 8 6 5 4 3 2 1 0 0 ...
## $ laps : int 58 58 58 58 58 57 55 53 47 43 ...
## $ time : chr "1:34:50.616" "+5.478" "+8.163" "+17.181" ...
## $ milliseconds : chr "5690616" "5696094" "5698779" "5707797" ...
## $ fastestLap : chr "39" "41" "41" "58" ...
## $ rank : chr "2" "3" "5" "7" ...
## $ fastestLapTime : chr "1:27.452" "1:27.739" "1:28.090" "1:28.603" ...
## $ fastestLapSpeed: chr "218.300" "217.586" "216.719" "215.464" ...
## $ statusId : int 1 1 1 1 1 11 5 5 4 3 ...
## - attr(*, ".internal.selfref")=<externalptr>
head(races)
## raceId year round circuitId name date time
## 1: 1 2009 1 1 Australian Grand Prix 2009-03-29 06:00:00
## 2: 2 2009 2 2 Malaysian Grand Prix 2009-04-05 09:00:00
## 3: 3 2009 3 17 Chinese Grand Prix 2009-04-19 07:00:00
## 4: 4 2009 4 3 Bahrain Grand Prix 2009-04-26 12:00:00
## 5: 5 2009 5 4 Spanish Grand Prix 2009-05-10 12:00:00
## 6: 6 2009 6 6 Monaco Grand Prix 2009-05-24 12:00:00
## url
## 1: http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix
## 2: http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix
## 3: http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix
## 4: http://en.wikipedia.org/wiki/2009_Bahrain_Grand_Prix
## 5: http://en.wikipedia.org/wiki/2009_Spanish_Grand_Prix
## 6: http://en.wikipedia.org/wiki/2009_Monaco_Grand_Prix
head(drivers)
## driverId driverRef number code forename surname dob nationality
## 1: 1 hamilton 44 HAM Lewis Hamilton 1985-01-07 British
## 2: 2 heidfeld \\N HEI Nick Heidfeld 1977-05-10 German
## 3: 3 rosberg 6 ROS Nico Rosberg 1985-06-27 German
## 4: 4 alonso 14 ALO Fernando Alonso 1981-07-29 Spanish
## 5: 5 kovalainen \\N KOV Heikki Kovalainen 1981-10-19 Finnish
## 6: 6 nakajima \\N NAK Kazuki Nakajima 1985-01-11 Japanese
## url
## 1: http://en.wikipedia.org/wiki/Lewis_Hamilton
## 2: http://en.wikipedia.org/wiki/Nick_Heidfeld
## 3: http://en.wikipedia.org/wiki/Nico_Rosberg
## 4: http://en.wikipedia.org/wiki/Fernando_Alonso
## 5: http://en.wikipedia.org/wiki/Heikki_Kovalainen
## 6: http://en.wikipedia.org/wiki/Kazuki_Nakajima
#dataframe with fastest lap speed, driver id and race id, this makes it earsier to add fastest lap to dataframes later
speed <- results[,c("raceId","driverId","fastestLapSpeed")]
#new data frame joins driver standings with races and drivers dataframes
driver_results_df <- driver_standings %>%
left_join(races, by = "raceId") %>%
left_join(drivers, by = "driverId")%>%
mutate(Driver = paste(forename, surname)) %>%
left_join(speed, by = c("driverId","raceId")) %>% # addition of speed data frame with driverId","raceId" or duplicates will be generated
filter(position == "1") #filter for winner of race, this means we only plot for the fastest lap of the races where the drivers won
driver_results_df2 <- driver_results_df[!driver_results_df$fastestLapSpeed == "\\N", ] #remove "\\N" this is NA equivalent
#Top10 is a df with the top 10 drivers by number of wins, these will be the only drivers included in the plot
Top10 = driver_results_df2 %>%
count(Driver, sort=T) %>% slice(1:10)
#change lap speed from character to integer
driver_results_df2$fastestLapSpeed <-as.integer(driver_results_df2$fastestLapSpeed)
#filter driver_results_df2 with the top 10 drivers
driver_results_df2 <- driver_results_df2 %>%
filter(Driver %in% c(Top10$Driver))
driver_results_df2 %>%
ggplot(aes(x= fastestLapSpeed, y = Driver)) +
geom_violin(aes(fill = nationality, color = nationality), width = 0.5, size = 0.1) +
scale_fill_viridis(discrete = T) +
scale_color_viridis(discrete = T) +
geom_boxplot(width = 0.2, size = 0.05, alpha = 0.3, color = "red") +
labs(title = "Max lap speed by driver when they won the race", subtitle = "2015-2021" ) +
theme(plot.title = element_text(size = 16, color = "Black"),
plot.subtitle = element_text(size = 14, color = "red")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) +
labs(
fill = "nationality",
color = "nationality",
y = NULL,
x = "Fastest Lap Avg Speed (KM/H)")