Baby Names is a dataset with baby names for USA and New Zealand. For each year from 1880 to 2017 it holds the number of children of each sex given each name.
str(babynames)
## spec_tbl_df [1,924,665 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ year: num [1:1924665] 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
## $ sex : chr [1:1924665] "F" "F" "F" "F" ...
## $ name: chr [1:1924665] "Mary" "Anna" "Emma" "Elizabeth" ...
## $ n : num [1:1924665] 7065 2604 2003 1939 1746 ...
## $ prop: num [1:1924665] 0.0724 0.0267 0.0205 0.0199 0.0179 ...
## - attr(*, "spec")=
## .. cols(
## .. year = col_double(),
## .. sex = col_character(),
## .. name = col_character(),
## .. n = col_double(),
## .. prop = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
I added a new variable holding the initial letter for all names in order to plot the proportion of initial letters
ggplot(babynames, aes(x = year)) +
geom_histogram(aes(x = year, y = ..count.. / 1e3), fill = "#002546", alpha = 0.3, colour = "#002546") +
geom_line(aes(x = year, y = n_world/5e7), inherit.aes = FALSE ) +
labs(title = "Total number of names per year",
caption = "source: tidytuesday - Baby names") +
theme(axis.line.x = element_line(color = 'black'),
axis.text = element_text(size = 18, family = "AUPassata_Rg"),
plot.title = element_text(size = 25, hjust = 0.5, family = "AUPassata_Bold"),
text = element_text(size = 20, family = "AUPassata_Rg")) +
xlab("Year") +
ylab("Total number (in thousands)")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## # A tibble: 53 × 2
## # Groups: name [53]
## name n
## <chr> <int>
## 1 Mary 88
## 2 Helen 30
## 3 Anna 25
## 4 Emma 21
## 5 Dorothy 20
## 6 Jessica 19
## 7 Linda 17
## 8 Jennifer 17
## 9 Barbara 15
## 10 Emily 15
## 11 Ashley 14
## 12 Patricia 13
## 13 Lisa 12
## 14 Betty 10
## 15 Margaret 9
## 16 Amanda 9
## 17 Susan 8
## 18 Michelle 7
## 19 Madison 7
## 20 Isabella 6
## 21 Olivia 6
## 22 Sophia 6
## 23 Elizabeth 5
## 24 Amy 5
## 25 Hannah 5
## 26 Kimberly 4
## 27 Melissa 4
## 28 Deborah 3
## 29 Brittany 3
## 30 Shirley 2
## 31 Ava 2
## 32 Ruth 1
## 33 Debra 1
## 34 Karen 1
## 35 Heather 1
## 36 Sarah 1
## 37 Samantha 1
## 38 Alexis 1
## 39 John 80
## 40 James 80
## 41 Michael 58
## 42 William 49
## 43 Robert 36
## 44 Christopher 24
## 45 Matthew 21
## 46 Jacob 18
## 47 David 16
## 48 Jason 8
## 49 Joshua 5
## 50 Ethan 5
## 51 Noah 5
## 52 Liam 5
## 53 Mason 4
In 2003, Emma was used in the TV show “Friends” as the name for Rachel and Ross’s baby
filter(pop_babynames, name == "Emma") %>%
select(., year, name, prop)
## # A tibble: 21 × 3
## # Groups: year [21]
## year name prop
## <dbl> <chr> <dbl>
## 1 1881 Emma 0.0206
## 2 1882 Emma 0.0199
## 3 1883 Emma 0.0197
## 4 1884 Emma 0.0188
## 5 1885 Emma 0.0192
## 6 1886 Emma 0.0180
## 7 2003 Emma 0.0113
## 8 2004 Emma 0.0107
## 9 2005 Emma 0.0100
## 10 2006 Emma 0.00915
## # … with 11 more rows
filter(babynames, name == "Emma") %>%
ggplot(., aes(x = year, y = prop)) +
geom_point() +
labs(title = "Emma",
caption = "source: tidytuesday - Baby names") +
theme(axis.line.x = element_line(color = 'black'),
axis.text = element_text(size = 18, family = "AUPassata_Rg"),
plot.title = element_text(size = 25, hjust = 0.5, family = "AUPassata_Bold"),
text = element_text(size = 20, family = "AUPassata_Rg")) +
geom_vline(aes(xintercept = 2003), colour = "red") +
xlab("Year") +
ylab("Popularity")
In 1964, the film The Americanization of Emily was shown in theatres
filter(babynames, name == "Emily") %>%
ggplot(., aes(x = year, y = prop)) +
geom_line() +
labs(title = "Emily",
caption = "source: tidytuesday - Baby names") +
theme(axis.line.x = element_line(color = 'black'),
axis.text = element_text(size = 18, family = "AUPassata_Rg"),
plot.title = element_text(size = 25, hjust = 0.5, family = "AUPassata_Bold"),
text = element_text(size = 20, family = "AUPassata_Rg")) +
geom_vline(aes(xintercept = 1964), colour = "red") +
xlab("Year") +
ylab("Popularity")