R Markdown

library("ggplot2")
library("reshape2")
library("dplyr")
library("extrafont")
library(RColorBrewer)
library(tidyverse)
library(ggpubr)
data <- tidytuesdayR::tt_load('2018-06-26')
## 
##  Downloading file 1 of 1: `week13_alcohol_global.csv`
alcohol = data$week13_alcohol_global

alcohol$total_servings = alcohol$beer_servings + alcohol$wine_servings + alcohol$spirit_servings
alcohol$beer = alcohol$beer_servings / alcohol$total_servings
alcohol$spirits = alcohol$spirit_servings / alcohol$total_servings
alcohol$wine = alcohol$wine_servings / alcohol$total_servings

top15 = alcohol %>% slice_max(total_litres_of_pure_alcohol,n=20) %>%select(country, total_litres_of_pure_alcohol, beer, spirits, wine) %>% gather("type", "prop", beer, spirits, wine)
ggplot(top15, aes(x=reorder(country, -total_litres_of_pure_alcohol), y=prop*total_litres_of_pure_alcohol, fill=type)) +
  geom_bar(stat="identity", width=0.8, position="stack")+
    scale_fill_manual(values=c("#6FB5EC",
                             "#217E51",
                             "#FF7D00"))+
  xlab('Country')+
  ylab('Total_litres_of_pure_alcohol')+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  ggtitle('Top 20 countries of total litres of pure alchohol')

europe = c("Albania", "Finland", "Andorra", "Austria", "Belgium",  "Bulgaria", "Bosnia and Herzegovina", "Belarus", "Switzerland", "Czech Republic", "Cyprus", "Germany", "Denmark", "Canary Islands", "Spain", "Estonia", "France", "UK", "Greece", "Croatia", "Hungary", "Ireland", "Iceland", "Italy", "San Marino", "Kosovo", "Liechtenstein", "Lithuania", "Luxembourg", "Latvia", "Monaco", "Moldova", "Macedonia", "Malta", "Montenegro", "Netherlands", "Norway", "Poland", "Portugal", "Romania", "Russia", "Serbia", "Slovakia", "Slovenia", "Sweden",   "Turkey", "Ukraine", "Vatican")

alcohol_europe <-alcohol %>% mutate(europe_or_not = ifelse(country %in% europe, 'europe', 'non-europe')) %>% gather("type", "value", beer_servings, spirit_servings, wine_servings)

alcohol_europe %>% ggplot(aes(europe_or_not, value, fill=type))+scale_fill_manual(values=c("#6FB5EC",
                             "#217E51",
                             "#FF7D00"))+geom_boxplot()+theme_bw()+facet_wrap(~type) + stat_compare_means(aes(group=europe_or_not), method='t.test', label='p')

# Read data
#data <- tidytuesdayR::tt_load('2018-06-26')
alcohol = data$week13_alcohol_global

# Alter names in data to match those in map.world 
m = as.character(alcohol$country)
names(m) = m
m[c("Antigua & Barbuda", "Bosnia-Herzegovina", "Cote d'Ivoire", "Cabo Verde", "Congo", "DR Congo", "Russian Federation", "United Kingdom")] = 
  c("Antigua", "Bosnia and Herzegovina", "Ivory Coast", "Cape Verde", "Republic of Congo", "Democratic Republic of the Congo", "Russia", "UK")
alcohol$country = unname(m)


# Calculate servings of each type relative to other types of alcohol
alcohol<-alcohol %>% arrange(desc(beer_servings)) %>% mutate(main_type=1:dim(alcohol)[1])

head(alcohol,n=20)
## # A tibble: 20 × 6
##    country        beer_servings spirit_servings wine_servings total_li…¹ main_…²
##    <chr>                  <dbl>           <dbl>         <dbl>      <dbl>   <int>
##  1 Namibia                  376               3             1        6.8       1
##  2 Czech Republic           361             170           134       11.8       2
##  3 Gabon                    347              98            59        8.9       3
##  4 Germany                  346             117           175       11.3       4
##  5 Lithuania                343             244            56       12.9       5
##  6 Poland                   343             215            56       10.9       6
##  7 Venezuela                333             100             3        7.7       7
##  8 Ireland                  313             118           165       11.4       8
##  9 Palau                    306              63            23        6.9       9
## 10 Romania                  297             122           167       10.4      10
## 11 Belgium                  295              84           212       10.5      11
## 12 Panama                   285             104            18        7.2      12
## 13 Spain                    284             157           112       10        13
## 14 Serbia                   283             131           127        9.6      14
## 15 Latvia                   281             216            62       10.5      15
## 16 Austria                  279              75           191        9.7      16
## 17 Slovenia                 270              51           276       10.6      17
## 18 Belize                   263             114             8        6.8      18
## 19 Finland                  263             133            97       10        19
## 20 Australia                261              72           212       10.4      20
## # … with abbreviated variable names ¹​total_litres_of_pure_alcohol, ²​main_type
# Get world map
map.world <- map_data(map="world")


# Join alcohol data with map data
map.world = left_join(map.world, alcohol, by = c('region' = 'country'))
map.world$main_type = factor(map.world$main_type)

# Plot - World map with preferred alcoholic beverage per country
preferred_plot = ggplot() +
  geom_polygon(data = map.world, aes(x = long, y = lat, group = group, fill = main_type), color="white", size=1) +
  scale_fill_manual(values = c(colorRampPalette(c(brewer.pal(9,'Set1')[1],brewer.pal(9,'Set1')[5]),bias=1)(20),
                               colorRampPalette(c("white","gray"),bias=10)(dim(alcohol)[1]-20)), na.value=brewer.pal(9,'Set1')[9])+
  theme(legend.position="none") +
  labs(title="\ntop 20 rank around the world", subtitle="beer\n") +
  #guides(fill = guide_legend(override.aes=list(size=3))) +
  theme(text=element_text(family="Verdana"),
        plot.title = element_text(hjust = 0.5, size=25), 
        plot.subtitle = element_text(hjust = 0.5, size = 16),
        #legend.position = "top",
        #legend.text=element_text(size=16),
        panel.background = element_blank(),
        plot.background = element_blank(),
        panel.grid = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        axis.ticks = element_blank()
  ) 

print(preferred_plot)

## # A tibble: 20 × 6
##    country                      beer_servings spirit_s…¹ wine_…² total…³ main_…⁴
##    <chr>                                <dbl>      <dbl>   <dbl>   <dbl>   <int>
##  1 Grenada                                199        438      28    11.9       1
##  2 Belarus                                142        373      42    14.4       2
##  3 Haiti                                    1        326       1     5.9       3
##  4 Russia                                 247        326      73    11.5       4
##  5 St. Lucia                              171        315      71    10.1       5
##  6 Guyana                                  93        302       1     7.1       6
##  7 Slovakia                               196        293     116    11.4       7
##  8 Dominica                                52        286      26     6.6       8
##  9 Thailand                                99        258       1     6.4       9
## 10 Cook Islands                             0        254      74     5.9      10
## 11 Bulgaria                               231        252      94    10.3      11
## 12 Kazakhstan                             124        246      12     6.8      12
## 13 Lithuania                              343        244      56    12.9      13
## 14 Ukraine                                206        237      45     8.9      14
## 15 Moldova                                109        226      18     6.3      15
## 16 St. Vincent & the Grenadines           120        221      11     6.3      16
## 17 Latvia                                 281        216      62    10.5      17
## 18 Hungary                                234        215     185    11.3      18
## 19 Poland                                 343        215      56    10.9      19
## 20 St. Kitts & Nevis                      194        205      32     7.7      20
## # … with abbreviated variable names ¹​spirit_servings, ²​wine_servings,
## #   ³​total_litres_of_pure_alcohol, ⁴​main_type

## # A tibble: 20 × 6
##    country           beer_servings spirit_servings wine_servings total…¹ main_…²
##    <chr>                     <dbl>           <dbl>         <dbl>   <dbl>   <int>
##  1 France                      127             151           370    11.8       1
##  2 Portugal                    194              67           339    11         2
##  3 Andorra                     245             138           312    12.4       3
##  4 Switzerland                 185             100           280    10.2       4
##  5 Denmark                     224              81           278    10.4       5
##  6 Slovenia                    270              51           276    10.6       6
##  7 Luxembourg                  236             133           271    11.4       7
##  8 Croatia                     230              87           254    10.2       8
##  9 Italy                        85              42           237     6.5       9
## 10 Equatorial Guinea            92               0           233     5.8      10
## 11 Argentina                   193              25           221     8.3      11
## 12 Uruguay                     115              35           220     6.6      12
## 13 Greece                      133             112           218     8.3      13
## 14 Australia                   261              72           212    10.4      14
## 15 Belgium                     295              84           212    10.5      15
## 16 UK                          219             126           195    10.4      16
## 17 Austria                     279              75           191     9.7      17
## 18 Netherlands                 251              88           190     9.4      18
## 19 Sweden                      152              60           186     7.2      19
## 20 Hungary                     234             215           185    11.3      20
## # … with abbreviated variable names ¹​total_litres_of_pure_alcohol, ²​main_type

alcohol = data$week13_alcohol_global
map.world <- map_data(map="world")
df_plot_long<-map.world %>% group_by(region)%>%
  mutate(long_m=mean(long)) 
#%>%
#  distinct(long,region, .keep_all= F)
dedup_df<-df_plot_long[!duplicated(df_plot_long$region), ] 
# Join alcohol data with map data
dedup_df = left_join(dedup_df[c("region","long_m")], alcohol, by = c('region' = 'country'))
dedup_df<-dedup_df %>% arrange(long_m) %>% na.omit()
dedup_df<-dedup_df %>%  tidyr::pivot_longer(!c(region,long_m), names_to = "alcohol", values_to = "value")

dedup_df<-dedup_df %>% group_by(alcohol) %>%mutate(avg_wins_vs_expectations=mean(value))
dedup_df$alcohol<- factor(dedup_df$alcohol,levels = c(
    "beer_servings",
    "spirit_servings",
    "wine_servings",
    "total_litres_of_pure_alcohol"
  ))
p <- ggplot(dedup_df,
            aes(x = long_m, y = value)) +
  facet_grid(alcohol ~ ., scales="free") +
  geom_hline(
    aes(yintercept = avg_wins_vs_expectations, color = alcohol),
    linetype = "dashed",
    size = 1.3,
    alpha = 0.8
  ) +
  geom_line(aes(color = alcohol)) +
  geom_point(size = 1, aes(color = alcohol), alpha = 0.6) +
  scale_color_manual(
    values = c(
      "beer_servings" = "#6FB5EC",
      "spirit_servings" = "#217E51",
      "wine_servings" = "#FF7D00",
      "total_litres_of_pure_alcohol" = "#9A0D20"
      #"Louisiana\nTech" = "#6FB5EC"
    )
  ) +
  theme_minimal() +
  theme(
    #title = element_text("alcohol and long"),
    legend.position = "top",
    #axis.text.x = element_text(size = 17, family = "Impact"),
    #axis.text.y = element_blank(),
    #axis.title = element_blank(),
    #strip.text.y.left = element_blank(),
    panel.grid.major.x = element_line(size = 1.1),
    plot.background = element_rect(fill = "#FFF8E7", color = "transparent")
  )+labs(title="Alcohol and longitude")+xlab("long")
p

alcohol = data$week13_alcohol_global
map.world <- map_data(map="world")
df_plot_long<-map.world %>% group_by(region)%>%
  mutate(long_m=mean(lat)) 
#%>%
#  distinct(long,region, .keep_all= F)
dedup_df<-df_plot_long[!duplicated(df_plot_long$region), ] 
# Join alcohol data with map data
dedup_df = left_join(dedup_df[c("region","long_m")], alcohol, by = c('region' = 'country'))
dedup_df<-dedup_df %>% arrange(long_m) %>% na.omit()
dedup_df<-dedup_df %>%  tidyr::pivot_longer(!c(region,long_m), names_to = "alcohol", values_to = "value")

dedup_df<-dedup_df %>% group_by(alcohol) %>%mutate(avg_wins_vs_expectations=mean(value))
dedup_df$alcohol<- factor(dedup_df$alcohol,levels = c(
    "beer_servings",
    "spirit_servings",
    "wine_servings",
    "total_litres_of_pure_alcohol"
  ))
p1 <- ggplot(dedup_df,
            aes(x = long_m, y = value)) +
  facet_grid(alcohol ~ ., scales="free") +
  geom_hline(
    aes(yintercept = avg_wins_vs_expectations, color = alcohol),
    linetype = "dashed",
    size = 1.3,
    alpha = 0.8
  ) +
  geom_line(aes(color = alcohol)) +
  geom_point(size = 1, aes(color = alcohol), alpha = 0.6) +
  scale_color_manual(
    values = c(
      "beer_servings" = "#6FB5EC",
      "spirit_servings" = "#217E51",
      "wine_servings" = "#FF7D00",
      "total_litres_of_pure_alcohol" = "#9A0D20"
      #"Louisiana\nTech" = "#6FB5EC"
    )
  ) +
  theme_minimal() +
  theme(
    #title = element_text("alcohol and long"),
    legend.position = "top",
    #axis.text.x = element_text(size = 17, family = "Impact"),
    #axis.text.y = element_blank(),
    #axis.title = element_blank(),
    #strip.text.y.left = element_blank(),
    panel.grid.major.x = element_line(size = 1.1),
    plot.background = element_rect(fill = "#FFF8E7", color = "transparent")
  )+labs(title="Alcohol and latitude")+xlab("lat")
p1