2022-09-23

LEGO

Trends in LEGO production over time.

library(tidyverse)
library(ggrepel)
library(gganimate)

LEGO sets since 1949

The code for the plot

sets %>% 
  group_by(year) %>% 
  summarise(Counts = n()) %>% 
  mutate(Cumulative = cumsum(Counts)) %>% 
  filter(year != 2022) %>% 
  pivot_longer(-year) %>% 
  ggplot(aes(year,value)) +
  theme_classic(14) +
  th +
  geom_point(size = 3, color="#E3000B") +
  geom_line(color="#E3000B") +
  labs(title = "LEGO set from 1949 to 2021",
       x = "Year",
       y = "Sets") +
  facet_wrap(~name, scales = "free")

dataset used

## # A tibble: 15 × 3
##       id name                   parent_id
##    <dbl> <chr>                      <dbl>
##  1   512 Castle                       507
##  2   662 Princess Castle              504
##  3   566 The Lord of the Rings        561
##  4    56 Construction                  52
##  5   141 Space Police III             126
##  6   556 Series 16 Minifigures        535
##  7   269 Cars                          NA
##  8   534 Universal Building Set       507
##  9   717 Speed Racer                   NA
## 10   191 Dark Forest                  186
## 11   415 Coast Guard                  411
## 12   614 Jungle                        52
## 13   626 Bob the Builder              504
## 14   507 Educational and Dacta         NA
## 15    60 Hospital                      52

dataset used

## # A tibble: 15 × 6
##    set_num  name                                    year theme…¹ num_p…² img_url
##    <chr>    <chr>                                  <dbl>   <dbl>   <dbl> <chr>  
##  1 1450-1   Small Bucket                            2000     505      50 https:…
##  2 7779-1   The Batman Dragster: Catwoman Pursuit   2006     697      93 https:…
##  3 31091-1  Shuttle Transporter                     2019     672     341 https:…
##  4 21115-1  The First Night                         2014     577     408 https:…
##  5 5980-1   Squidman's Pitstop                      2009     141     394 https:…
##  6 8304-1   Smokin' Slickster                       2011     112      44 https:…
##  7 71017-11 Red Hood                                2017     609       9 https:…
##  8 1462-1   Galactic Scout                          1992     129      23 https:…
##  9 66665-1  Bundle Pack                             2021     688       0 https:…
## 10 852717-1 Irina Spalko Key Chain                  2009     503       0 https:…
## 11 820-1    Red Plates Parts Pack                   1980     473      34 https:…
## 12 5124-1   Wheels and Bearings                     1994     443      28 https:…
## 13 3386-1   Xtreme Stunts Pepper Roni Chupa Chups…  2003     407       4 https:…
## 14 2620-1   Sports Car                              1980     632       3 https:…
## 15 40458-1  LEGO House Chef                         2021     599       7 https:…
## # … with abbreviated variable names ¹​theme_id, ²​num_parts

The 15 biggest LEGO themes

The code for the plot

sets_themes <- left_join(sets, themes, by = c("theme_id" = "id"))

big_themes <- sets_themes %>%
    mutate(parent_id = ifelse(is.na(parent_id), theme_id, parent_id)) %>%
    filter(parent_id != 501) %>% group_by(parent_id) %>%
    summarise(count = n()) %>% arrange(desc(count)) %>% slice(1:15) %>%
    left_join(select(themes, id, name), by = c("parent_id" = "id"))

sets_per_theme_per_year <- sets_themes %>%
    mutate(parent_id = ifelse(is.na(parent_id), theme_id, parent_id)) %>%
    group_by(year, parent_id) %>% 
    filter(parent_id %in% big_themes$parent_id) %>%
    summarise(count = n(), .groups = "drop") %>%
    left_join(select(themes, id, name), by = c("parent_id" = "id")) %>%
    select(-c(parent_id)) %>% group_by(name) %>%
    mutate(sums = cumsum(count)) %>% filter(year != 2022) %>%
    group_by(name) %>%
    mutate(name_group = ifelse(sums == max(sums), name, NA))

The code for the plot 2

ggplot(data = sets_per_theme_per_year, aes(x = year, y = sums, color = name))+
    geom_point()+
    theme_classic(14)+
    geom_line()+
    labs(title = "Biggest themes development from 1949 - 2021",
         x = "Year",y = "Sets", color = "Theme")+
    geom_text_repel(
      aes(label = ifelse(is.na(sets_per_theme_per_year$name_group),
                              "", sets_per_theme_per_year$name_group)),
                    box.padding = 0.35,
                    point.padding = 0.1,
                    nudge_x = 2,
                    nudge_y = 100,
                    segment.color = "black",
                    parse = F,
                    max.overlaps = 100)+
    scale_x_continuous(limits = c(1945, 2030))+
    scale_color_viridis_d(option = "turbo")+
    guides(color = "none")+ th

The 15 largest themes per year

The code for the plot

animation <- ggplot(sets_per_theme_per_year,aes(name,count, fill=name)) +
  geom_bar(stat = "identity") +
  coord_flip()+
  guides(fill="none")+
  theme_classic(10)+
  th+
  scale_color_viridis_d(option = "turbo")+
  transition_states(year, transition_length = 1, state_length = 1) +
  labs(title = "Number of produced sets, Year: {closest_state}",
       x = "Set names", y = "Number of sets per year") +
  ease_aes()

animate(animation, fps = 4)

Forecast