R Markdown
library("ggplot2")
library("reshape2")
library("dplyr")
library("extrafont")
library(RColorBrewer)
library(tidyverse)
library(ggpubr)
data <- tidytuesdayR::tt_load('2018-06-26')
##
## Downloading file 1 of 1: `week13_alcohol_global.csv`
alcohol = data$week13_alcohol_global
alcohol$total_servings = alcohol$beer_servings + alcohol$wine_servings + alcohol$spirit_servings
alcohol$beer = alcohol$beer_servings / alcohol$total_servings
alcohol$spirits = alcohol$spirit_servings / alcohol$total_servings
alcohol$wine = alcohol$wine_servings / alcohol$total_servings
top15 = alcohol %>% slice_max(total_litres_of_pure_alcohol,n=20) %>%select(country, total_litres_of_pure_alcohol, beer, spirits, wine) %>% gather("type", "prop", beer, spirits, wine)
ggplot(top15, aes(x=reorder(country, -total_litres_of_pure_alcohol), y=prop*total_litres_of_pure_alcohol, fill=type)) +
geom_bar(stat="identity", width=0.8, position="stack")+
scale_fill_manual(values=c("#6FB5EC",
"#217E51",
"#FF7D00"))+
xlab('Country')+
ylab('Total_litres_of_pure_alcohol')+
theme_bw()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle('Top 20 countries of total litres of pure alchohol')
europe = c("Albania", "Finland", "Andorra", "Austria", "Belgium", "Bulgaria", "Bosnia and Herzegovina", "Belarus", "Switzerland", "Czech Republic", "Cyprus", "Germany", "Denmark", "Canary Islands", "Spain", "Estonia", "France", "UK", "Greece", "Croatia", "Hungary", "Ireland", "Iceland", "Italy", "San Marino", "Kosovo", "Liechtenstein", "Lithuania", "Luxembourg", "Latvia", "Monaco", "Moldova", "Macedonia", "Malta", "Montenegro", "Netherlands", "Norway", "Poland", "Portugal", "Romania", "Russia", "Serbia", "Slovakia", "Slovenia", "Sweden", "Turkey", "Ukraine", "Vatican")
alcohol_europe <-alcohol %>% mutate(europe_or_not = ifelse(country %in% europe, 'europe', 'non-europe')) %>% gather("type", "value", beer_servings, spirit_servings, wine_servings)
alcohol_europe %>% ggplot(aes(europe_or_not, value, fill=type))+scale_fill_manual(values=c("#6FB5EC",
"#217E51",
"#FF7D00"))+geom_boxplot()+theme_bw()+facet_wrap(~type) + stat_compare_means(aes(group=europe_or_not), method='t.test', label='p')
# Read data
#data <- tidytuesdayR::tt_load('2018-06-26')
alcohol = data$week13_alcohol_global
# Alter names in data to match those in map.world
m = as.character(alcohol$country)
names(m) = m
m[c("Antigua & Barbuda", "Bosnia-Herzegovina", "Cote d'Ivoire", "Cabo Verde", "Congo", "DR Congo", "Russian Federation", "United Kingdom")] =
c("Antigua", "Bosnia and Herzegovina", "Ivory Coast", "Cape Verde", "Republic of Congo", "Democratic Republic of the Congo", "Russia", "UK")
alcohol$country = unname(m)
# Calculate servings of each type relative to other types of alcohol
alcohol<-alcohol %>% arrange(desc(beer_servings)) %>% mutate(main_type=1:dim(alcohol)[1])
head(alcohol,n=20)
## # A tibble: 20 × 6
## country beer_servings spirit_servings wine_servings total_li…¹ main_…²
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 Namibia 376 3 1 6.8 1
## 2 Czech Republic 361 170 134 11.8 2
## 3 Gabon 347 98 59 8.9 3
## 4 Germany 346 117 175 11.3 4
## 5 Lithuania 343 244 56 12.9 5
## 6 Poland 343 215 56 10.9 6
## 7 Venezuela 333 100 3 7.7 7
## 8 Ireland 313 118 165 11.4 8
## 9 Palau 306 63 23 6.9 9
## 10 Romania 297 122 167 10.4 10
## 11 Belgium 295 84 212 10.5 11
## 12 Panama 285 104 18 7.2 12
## 13 Spain 284 157 112 10 13
## 14 Serbia 283 131 127 9.6 14
## 15 Latvia 281 216 62 10.5 15
## 16 Austria 279 75 191 9.7 16
## 17 Slovenia 270 51 276 10.6 17
## 18 Belize 263 114 8 6.8 18
## 19 Finland 263 133 97 10 19
## 20 Australia 261 72 212 10.4 20
## # … with abbreviated variable names ¹total_litres_of_pure_alcohol, ²main_type
# Get world map
map.world <- map_data(map="world")
# Join alcohol data with map data
map.world = left_join(map.world, alcohol, by = c('region' = 'country'))
map.world$main_type = factor(map.world$main_type)
# Plot - World map with preferred alcoholic beverage per country
preferred_plot = ggplot() +
geom_polygon(data = map.world, aes(x = long, y = lat, group = group, fill = main_type), color="white", size=1) +
scale_fill_manual(values = c(colorRampPalette(c(brewer.pal(9,'Set1')[1],brewer.pal(9,'Set1')[5]),bias=1)(20),
colorRampPalette(c("white","gray"),bias=10)(dim(alcohol)[1]-20)), na.value=brewer.pal(9,'Set1')[9])+
theme(legend.position="none") +
labs(title="\ntop 20 rank around the world", subtitle="beer\n") +
#guides(fill = guide_legend(override.aes=list(size=3))) +
theme(text=element_text(family="Verdana"),
plot.title = element_text(hjust = 0.5, size=25),
plot.subtitle = element_text(hjust = 0.5, size = 16),
#legend.position = "top",
#legend.text=element_text(size=16),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
print(preferred_plot)
## # A tibble: 20 × 6
## country beer_servings spirit_s…¹ wine_…² total…³ main_…⁴
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 Grenada 199 438 28 11.9 1
## 2 Belarus 142 373 42 14.4 2
## 3 Haiti 1 326 1 5.9 3
## 4 Russia 247 326 73 11.5 4
## 5 St. Lucia 171 315 71 10.1 5
## 6 Guyana 93 302 1 7.1 6
## 7 Slovakia 196 293 116 11.4 7
## 8 Dominica 52 286 26 6.6 8
## 9 Thailand 99 258 1 6.4 9
## 10 Cook Islands 0 254 74 5.9 10
## 11 Bulgaria 231 252 94 10.3 11
## 12 Kazakhstan 124 246 12 6.8 12
## 13 Lithuania 343 244 56 12.9 13
## 14 Ukraine 206 237 45 8.9 14
## 15 Moldova 109 226 18 6.3 15
## 16 St. Vincent & the Grenadines 120 221 11 6.3 16
## 17 Latvia 281 216 62 10.5 17
## 18 Hungary 234 215 185 11.3 18
## 19 Poland 343 215 56 10.9 19
## 20 St. Kitts & Nevis 194 205 32 7.7 20
## # … with abbreviated variable names ¹spirit_servings, ²wine_servings,
## # ³total_litres_of_pure_alcohol, ⁴main_type
## # A tibble: 20 × 6
## country beer_servings spirit_servings wine_servings total…¹ main_…²
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 France 127 151 370 11.8 1
## 2 Portugal 194 67 339 11 2
## 3 Andorra 245 138 312 12.4 3
## 4 Switzerland 185 100 280 10.2 4
## 5 Denmark 224 81 278 10.4 5
## 6 Slovenia 270 51 276 10.6 6
## 7 Luxembourg 236 133 271 11.4 7
## 8 Croatia 230 87 254 10.2 8
## 9 Italy 85 42 237 6.5 9
## 10 Equatorial Guinea 92 0 233 5.8 10
## 11 Argentina 193 25 221 8.3 11
## 12 Uruguay 115 35 220 6.6 12
## 13 Greece 133 112 218 8.3 13
## 14 Australia 261 72 212 10.4 14
## 15 Belgium 295 84 212 10.5 15
## 16 UK 219 126 195 10.4 16
## 17 Austria 279 75 191 9.7 17
## 18 Netherlands 251 88 190 9.4 18
## 19 Sweden 152 60 186 7.2 19
## 20 Hungary 234 215 185 11.3 20
## # … with abbreviated variable names ¹total_litres_of_pure_alcohol, ²main_type
alcohol = data$week13_alcohol_global
map.world <- map_data(map="world")
df_plot_long<-map.world %>% group_by(region)%>%
mutate(long_m=mean(long))
#%>%
# distinct(long,region, .keep_all= F)
dedup_df<-df_plot_long[!duplicated(df_plot_long$region), ]
# Join alcohol data with map data
dedup_df = left_join(dedup_df[c("region","long_m")], alcohol, by = c('region' = 'country'))
dedup_df<-dedup_df %>% arrange(long_m) %>% na.omit()
dedup_df<-dedup_df %>% tidyr::pivot_longer(!c(region,long_m), names_to = "alcohol", values_to = "value")
dedup_df<-dedup_df %>% group_by(alcohol) %>%mutate(avg_wins_vs_expectations=mean(value))
dedup_df$alcohol<- factor(dedup_df$alcohol,levels = c(
"beer_servings",
"spirit_servings",
"wine_servings",
"total_litres_of_pure_alcohol"
))
p <- ggplot(dedup_df,
aes(x = long_m, y = value)) +
facet_grid(alcohol ~ ., scales="free") +
geom_hline(
aes(yintercept = avg_wins_vs_expectations, color = alcohol),
linetype = "dashed",
size = 1.3,
alpha = 0.8
) +
geom_line(aes(color = alcohol)) +
geom_point(size = 1, aes(color = alcohol), alpha = 0.6) +
scale_color_manual(
values = c(
"beer_servings" = "#6FB5EC",
"spirit_servings" = "#217E51",
"wine_servings" = "#FF7D00",
"total_litres_of_pure_alcohol" = "#9A0D20"
#"Louisiana\nTech" = "#6FB5EC"
)
) +
theme_minimal() +
theme(
#title = element_text("alcohol and long"),
legend.position = "top",
#axis.text.x = element_text(size = 17, family = "Impact"),
#axis.text.y = element_blank(),
#axis.title = element_blank(),
#strip.text.y.left = element_blank(),
panel.grid.major.x = element_line(size = 1.1),
plot.background = element_rect(fill = "#FFF8E7", color = "transparent")
)+labs(title="Alcohol and longitude")+xlab("long")
p
alcohol = data$week13_alcohol_global
map.world <- map_data(map="world")
df_plot_long<-map.world %>% group_by(region)%>%
mutate(long_m=mean(lat))
#%>%
# distinct(long,region, .keep_all= F)
dedup_df<-df_plot_long[!duplicated(df_plot_long$region), ]
# Join alcohol data with map data
dedup_df = left_join(dedup_df[c("region","long_m")], alcohol, by = c('region' = 'country'))
dedup_df<-dedup_df %>% arrange(long_m) %>% na.omit()
dedup_df<-dedup_df %>% tidyr::pivot_longer(!c(region,long_m), names_to = "alcohol", values_to = "value")
dedup_df<-dedup_df %>% group_by(alcohol) %>%mutate(avg_wins_vs_expectations=mean(value))
dedup_df$alcohol<- factor(dedup_df$alcohol,levels = c(
"beer_servings",
"spirit_servings",
"wine_servings",
"total_litres_of_pure_alcohol"
))
p1 <- ggplot(dedup_df,
aes(x = long_m, y = value)) +
facet_grid(alcohol ~ ., scales="free") +
geom_hline(
aes(yintercept = avg_wins_vs_expectations, color = alcohol),
linetype = "dashed",
size = 1.3,
alpha = 0.8
) +
geom_line(aes(color = alcohol)) +
geom_point(size = 1, aes(color = alcohol), alpha = 0.6) +
scale_color_manual(
values = c(
"beer_servings" = "#6FB5EC",
"spirit_servings" = "#217E51",
"wine_servings" = "#FF7D00",
"total_litres_of_pure_alcohol" = "#9A0D20"
#"Louisiana\nTech" = "#6FB5EC"
)
) +
theme_minimal() +
theme(
#title = element_text("alcohol and long"),
legend.position = "top",
#axis.text.x = element_text(size = 17, family = "Impact"),
#axis.text.y = element_blank(),
#axis.title = element_blank(),
#strip.text.y.left = element_blank(),
panel.grid.major.x = element_line(size = 1.1),
plot.background = element_rect(fill = "#FFF8E7", color = "transparent")
)+labs(title="Alcohol and latitude")+xlab("lat")
p1