Eurovision data

We chose to investigate 2022 data from Tanya Shapiro about the Eurovision Song Contest: https://github.com/tashapiro/eurovision-contest.

# Packages
  library(tidyr)
  library(dplyr)
  library(ggplot2)

# Set theme for all following graphs
  theme_set(theme_bw())

# Read in two datasets and create dataframes
  # tuesdata <-   tidytuesdayR::tt_load('2022-05-17')
  # eurovision <- tuesdata[["eurovision"]]
  # votes <-      tuesdata[["eurovision-votes"]]
  # save(eurovision, votes, file = "/Users/au734169/Documents/01_PhD/Advanced_R/eurovision.rda")
  load(file = "/Users/au734169/Documents/01_PhD/Advanced_R/eurovision.rda")
  
# Explore data
  knitr::kable(votes[1:10,])
year semi_final edition jury_or_televoting from_country to_country points duplicate
1975 f 1975f J Belgium Belgium 0 x
1975 f 1975f J Belgium Finland 0 NA
1975 f 1975f J Belgium France 2 NA
1975 f 1975f J Belgium Germany 0 NA
1975 f 1975f J Belgium Ireland 12 NA
1975 f 1975f J Belgium Israel 1 NA
1975 f 1975f J Belgium Italy 6 NA
1975 f 1975f J Belgium Luxembourg 0 NA
1975 f 1975f J Belgium Malta 7 NA
1975 f 1975f J Belgium Monaco 0 NA
  knitr::kable(eurovision[1:10,])
event host_city year host_country event_url section artist song artist_url image_url artist_country country_emoji running_order total_points rank rank_ordinal qualified winner
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Kalush Orchestra Stefania https://eurovision.tv/participant/kalush-orchestra-22 https://static.eurovision.tv/hb-cgi/images/963164d0-060d-4667-96b6-0b0cf1f797e6/avatar_small.jpg Ukraine :flag_ua: 6 337 1 1st TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final S10 De Diepte https://eurovision.tv/participant/s10-22 https://static.eurovision.tv/hb-cgi/images/96316719-75b2-415a-a60d-1c2dbdbd7eee/avatar_small.jpg Netherlands :flag_nl: 8 221 2 2nd TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Amanda Georgiadi Tenfjord Die Together https://eurovision.tv/participant/amanda-georgiadi-tenfjord-22 https://static.eurovision.tv/hb-cgi/images/9633696f-e238-4d32-8c27-c99ee0cecb6c/avatar_small.jpg Greece :flag_gr: 15 211 3 3rd TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final MARO Saudade, Saudade https://eurovision.tv/participant/maro-22 https://static.eurovision.tv/hb-cgi/images/96336bae-9891-466a-b723-a3281c80f61d/avatar_small.jpg Portugal :flag_pt: 10 208 4 4th TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Intelligent Music Project Intention https://eurovision.tv/participant/intelligent-music-project-22 https://static.eurovision.tv/hb-cgi/images/963166b3-81d6-409e-8a0a-12c176e1156d/avatar_small.jpg Bulgaria :flag_bg: 7 29 16 16th FALSE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final LPS Disko https://eurovision.tv/participant/lps-22 https://static.eurovision.tv/hb-cgi/images/96316379-9142-4117-a56c-bffa8aaf1011/avatar_small.jpg Slovenia :flag_si: 5 15 17 17th FALSE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Rosa Linn Snap https://eurovision.tv/participant/rosa-linn-22 https://static.eurovision.tv/hb-cgi/images/96336d8d-6b40-42b1-b25f-f99eafffc244/avatar_small.jpg Armenia :flag_am: 17 187 5 5th TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Subwoolfer Give That Wolf A Banana https://eurovision.tv/participant/subwoolfer-22 https://static.eurovision.tv/hb-cgi/images/96336df0-62dc-45a6-85ed-84ad27da5f3d/avatar_small.jpg Norway :flag_no: 16 177 6 6th TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Monika Liu Sentimentai https://eurovision.tv/participant/monika-liu-22 https://static.eurovision.tv/hb-cgi/images/96301c95-81f1-444c-a503-b7dad4df419b/avatar_small.jpg Lithuania :flag_lt: 3 159 7 7th TRUE FALSE
Turin 2022 Turin 2022 Italy https://eurovision.tv/event/turin-2022 first-semi-final Zdob şi Zdub & Advahov Brothers Trenulețul https://eurovision.tv/participant/zdob-si-zdub-and-fratii-advahov-22 https://static.eurovision.tv/hb-cgi/images/96318be3-1473-4ad6-8270-e18043886fd9/avatar_small.jpg Moldova :flag_md: 9 154 8 8th TRUE FALSE
  # str(votes)
  # str(eurovision)

Who won?

Question: Who won the Eurovision Song Contest each year? Was there always one winner?

# Who are the winners of the contest each year?   
  winners <- eurovision %>% 
    filter(winner == TRUE & section %in% c("final", "grand-final")) %>% 
    select(year, host_country, section, winner, artist, artist_country, total_points) %>% 
    arrange(year) # %>% 
    # nrow()
  # There are 69 rows in the dataset.

  knitr::kable(winners[1:10,])
year host_country section winner artist artist_country total_points
1956 Switzerland final TRUE Lys Assia Switzerland NA
1957 Germany final TRUE Corry Brokken Netherlands 31
1958 Netherlands final TRUE André Claveau France 27
1959 France final TRUE Teddy Scholten Netherlands 21
1960 United Kingdom final TRUE Jacqueline Boyer France 32
1961 France final TRUE Jean-Claude Pascal Luxembourg 31
1962 Luxembourg final TRUE Isabelle Aubret France 26
1963 United Kingdom final TRUE Grethe and Jørgen Ingmann Denmark 42
1964 Denmark final TRUE Gigliola Cinquetti Italy 49
1965 Italy final TRUE France Gall Luxembourg 32
# Who are the winning countries?
  ggplot(data = winners,
         aes(x = forcats::fct_infreq(artist_country))) +
           geom_bar() +
    theme(axis.text.x = element_text(angle = 90)) +
    labs(title = "Number of wins per country",
         x = "Country",
         y = "Number of wins")

# How many distinct years are present in the dataset?  
  eurovision %>% 
    distinct(year) %>% 
    nrow()
## [1] 67
  # There are 67 distinct years in the dataset.
  
# Are there any years with 0 winners?
    winners %>% 
      distinct(year) %>% 
      pull(year) %>%  # creates a vector from a column
      setdiff(c(1956:2022), .) # get all rows in x that are not in y
## [1] 2020
    # There was no winner in 2020 because the contest had to be stopped due to COVID-19 :(
  
# Are there any years with >1 winner?  
  winners %>% 
    group_by(year) %>% 
    filter(n()>1) %>% 
    knitr::kable(.)
year host_country section winner artist artist_country total_points
1969 Spain final TRUE Frida Boccara France 18
1969 Spain final TRUE Lenny Kuhr Netherlands 18
1969 Spain final TRUE Lulu United Kingdom 18
1969 Spain final TRUE Salomé Spain 18
  # Yes, 1969 had 4 winners.

Points system

Question: How many points are being distributed each year?

# Calculate the total number of points for each year
  points <- eurovision %>% 
    group_by(year) %>% 
    filter(section %in% c("final", "grand-final")) %>% 
    summarize(total_points_year = sum(total_points, na.rm = TRUE))

# Get the number of countries participating by year
  countries <- eurovision %>% 
    group_by(year) %>% 
    summarize(n_countries_year = n_distinct(artist_country))
  
# Join datasets  
  by_year <- full_join(points, countries, by = "year") %>% 
    mutate(points_per_country = total_points_year/n_countries_year)

# Plot total number of points
  ggplot(data = by_year,
         aes(x = year,
             y = total_points_year,
             size = n_countries_year)) +
    geom_point() +
    labs(title = "Eurovision: Total points to distribute each year", 
         x = "Year", 
         y = "Total points", 
         size = "Participating countries")

  ggplot(data = by_year,
         aes(x = year,
             y = points_per_country,
             color = n_countries_year)) +
    geom_point() +
    labs(title = "Eurovision: Points to distribute per country each year", 
         x = "Year", 
         y = "Points to distribute per country", 
         col = "Participating countries")

# Why were no points assigned in 1956?
  eurovision %>% 
    filter(year == 1956) %>% 
    select(event, artist, artist_country, section, total_points, rank, winner) %>% 
    knitr::kable(.)
event artist artist_country section total_points rank winner
Lugano 1956 Tonina Torielli Italy final NA 2 FALSE
Lugano 1956 Michèle Arnaud Luxembourg final NA 2 FALSE
Lugano 1956 Dany Dauberson France final NA 2 FALSE
Lugano 1956 Freddy Quinn Germany final NA 2 FALSE
Lugano 1956 Mony Marc Belgium final NA 2 FALSE
Lugano 1956 Lys Assia Switzerland final NA 1 TRUE
Lugano 1956 Corry Brokken Netherlands final NA 2 FALSE
Lugano 1956 Franca Raimondi Italy final NA 2 FALSE
Lugano 1956 Michèle Arnaud Luxembourg final NA 2 FALSE
Lugano 1956 Mathé Altéry France final NA 2 FALSE
Lugano 1956 Walter Andreas Schwarz Germany final NA 2 FALSE
Lugano 1956 Fud Leclerc Belgium final NA 2 FALSE
Lugano 1956 Lys Assia Switzerland final NA 2 FALSE
Lugano 1956 Jetty Paerl Netherlands final NA 2 FALSE
  # There was no points, only rank = 1 for the winner and rank = 2 for the rest.
  
# Why were no points assigned in 2020?
  eurovision %>% 
    filter(year == 2020) %>% 
    select(event, artist, artist_country, section, total_points, rank, winner) %>% 
    knitr::kable(.)
event artist artist_country section total_points rank winner
Rotterdam 2020 Ana Soklič Slovenia first-semi-final NA NA FALSE
Rotterdam 2020 Ulrikke Norway first-semi-final NA NA FALSE
Rotterdam 2020 Eden Alene Israel first-semi-final NA NA FALSE
Rotterdam 2020 Sandro Cyprus first-semi-final NA NA FALSE
Rotterdam 2020 Efendi Azerbaijan first-semi-final NA NA FALSE
Rotterdam 2020 Go_A Ukraine first-semi-final NA NA FALSE
Rotterdam 2020 Hooverphonic Belgium first-semi-final NA NA FALSE
Rotterdam 2020 Little Big Russia first-semi-final NA NA FALSE
Rotterdam 2020 Vasil North Macedonia first-semi-final NA NA FALSE
Rotterdam 2020 The Roop Lithuania first-semi-final NA NA FALSE
Rotterdam 2020 Montaigne Australia first-semi-final NA NA FALSE
Rotterdam 2020 Lesley Roy Ireland first-semi-final NA NA FALSE
Rotterdam 2020 Damir Kedžo Croatia first-semi-final NA NA FALSE
Rotterdam 2020 The Mamas Sweden first-semi-final NA NA FALSE
Rotterdam 2020 VAL Belarus first-semi-final NA NA FALSE
Rotterdam 2020 Destiny Malta first-semi-final NA NA FALSE
Rotterdam 2020 ROXEN Romania first-semi-final NA NA FALSE
Rotterdam 2020 Benny Cristo Czech Republic second-semi-final NA NA FALSE
Rotterdam 2020 Natalia Gordienko Moldova second-semi-final NA NA FALSE
Rotterdam 2020 Stefania Greece second-semi-final NA NA FALSE
Rotterdam 2020 Elisa Portugal second-semi-final NA NA FALSE
Rotterdam 2020 Daði og Gagnamagnið Iceland second-semi-final NA NA FALSE
Rotterdam 2020 Alicja Poland second-semi-final NA NA FALSE
Rotterdam 2020 Vincent Bueno Austria second-semi-final NA NA FALSE
Rotterdam 2020 Aksel Finland second-semi-final NA NA FALSE
Rotterdam 2020 Hurricane Serbia second-semi-final NA NA FALSE
Rotterdam 2020 Ben & Tan Denmark second-semi-final NA NA FALSE
Rotterdam 2020 Uku Suviste Estonia second-semi-final NA NA FALSE
Rotterdam 2020 Athena Manoukian Armenia second-semi-final NA NA FALSE
Rotterdam 2020 Gjon’s Tears Switzerland second-semi-final NA NA FALSE
Rotterdam 2020 VICTORIA Bulgaria second-semi-final NA NA FALSE
Rotterdam 2020 Samanta Tīna Latvia second-semi-final NA NA FALSE
Rotterdam 2020 Tornike Kipiani Georgia second-semi-final NA NA FALSE
Rotterdam 2020 Arilena Ara Albania second-semi-final NA NA FALSE
Rotterdam 2020 Senhit San Marino second-semi-final NA NA FALSE
Rotterdam 2020 Tom Leeb France grand-final NA NA FALSE
Rotterdam 2020 Blas Cantó Spain grand-final NA NA FALSE
Rotterdam 2020 Jeangu Macrooy Netherlands grand-final NA NA FALSE
Rotterdam 2020 James Newman United Kingdom grand-final NA NA FALSE
Rotterdam 2020 Ben Dolic Germany grand-final NA NA FALSE
Rotterdam 2020 Diodato Italy grand-final NA NA FALSE
  # Eurovision was cancelled and no points were assigned because of COVID-19 :(

What proportion of points did the winner get?

Question: What proportion of the total points distributed that year did the winner of the final get?

# What proportion of points did the winners get?
  winners_points <- left_join(winners, by_year, by = "year") %>% 
    mutate(total_points_prop = round((total_points / total_points_year) * 100, 1))
  
# Plot results
  ggplot(data = winners_points, aes(x=year, y=total_points_prop, fill = artist_country)) +
    geom_bar(stat = "identity") +
    labs(title = "Percent of points received by Eurovision winner by year", 
         x = "Year", 
         y = "Points received by winner (%)", 
         fill = "Country of winner")

Flow of points from country to country

Question: Which countries are sending votes to each other?

# Create from-to variable
  votes$from_to <- paste(votes$from_country, votes$to_country)

# Get the sum of votes
  sum_votes <- votes %>%
    filter(is.na(duplicate)) %>%
    select(from_to, points) %>%
    group_by(from_to) %>%
    summarise(sum_points = sum(points)) %>%
    separate(from_to, c("From", "To"))
  
# Plot flow of points within countries that received at least 250 points
  ggplot(data = filter(sum_votes, sum_points > 250),
         aes(x = From,
             y = To)) +
    geom_raster(aes(fill = sum_points)) +
    scale_fill_viridis_c(direction = -1) +
    labs(title = "Eurovision point distribution over the years 1956 - 2022 (>250 points)",
         x = "From country",
         y = "To country",
         fill = "Total points \n from country \n to country") +
    theme(axis.text.x = element_text(angle = 90))

# Plot flow of points within countries that received at least 400 points
  ggplot(data = filter(sum_votes, sum_points > 400),
         aes(x = From,
             y = To)) +
    geom_raster(aes(fill = sum_points)) +
    scale_fill_viridis_c(direction = -1) +
    labs(title = "Eurovision point distribution over the years 1956 - 2022 (>400 points)",
         x = "From country",
         y = "To country",
         fill = "Total points \n from country \n to country") +
    theme(axis.text.x = element_text(angle = 90))