R Markdown

Wealth Inequality in United States

Houcheng Li, Nan Xu, Aisha Shigna Nadukkandy

Packages used for the analysis:

Dataset 1:

income_distribution <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_distribution.csv')
extract <- income_distribution[income_distribution$race == "All Races",]
data1 <- extract[,c(1,4,6)]
data1 <- data1[!duplicated(data1$year),]
data1$year <- as.factor(data1$year)
data2 <- melt(data1, id.vars = "year")
ggplot(data2, aes(x=factor(year), y=value, colour=variable, group=variable)) + 
  geom_line(size=2) +
  theme_minimal() +
  scale_color_manual(values = c("income_median" = "blue", "income_mean" = "red")) +
  labs(x = "Year", y = "Income", title = "Change in income over the years for all races combined") +
  theme(plot.title = element_text(colour = 'black', family = 'Georgia', size = 16, hjust = 0.5),
        plot.subtitle = element_text(colour = 'black', family = 'Georgia', size = 12),
        axis.title = element_text(colour = 'black', family = 'Georgia', size = 16),
        axis.text = element_text(colour = 'black', family = 'Georgia', size = 7, angle = 45),
        legend.text = element_text(colour = 'black', family = 'Georgia', size = 10),
        legend.position = 'top',
        legend.title = element_blank(),
        plot.background = element_rect(fill = 'grey60'),
        panel.background = element_rect(fill = 'grey60', colour = 'grey60'))
data3 <- extract[,c(1,8,9)]
data3$income_bracket <- factor(data3$income_bracket,levels=unique(data3$income_bracket))
ggplot(data3,aes(x=year, y=income_distribution,fill=factor(income_bracket))) + 
  geom_col(position = "fill", width = 0.6) +
  labs(x = "Year",y = "Bracket") +
  scale_fill_manual(values = colour) +
  labs(x = "Year", y = "Income Distribution", title = "CIncome distribution between different salary groups in US") +
  theme_minimal() +
  theme(plot.title = element_text(colour = 'black', family = 'Georgia', size = 16, hjust = 0.5),
        plot.subtitle = element_text(colour = 'black', family = 'Georgia', size = 12),
        axis.title = element_text(colour = 'black', family = 'Georgia', size = 16),
        axis.text = element_text(colour = 'black', family = 'Georgia', size = 7, angle = 45),
        legend.text = element_text(colour = 'black', family = 'Georgia', size = 6),
        legend.position = 'top',
        legend.title = element_blank(),
        plot.background = element_rect(fill = 'grey60'),
        panel.background = element_rect(fill = 'grey60', colour = 'grey60'))

race_exclusion <- c("Asian Alone or in Combination", "Black Alone or in Combination")

filtered_data_race <- income_distribution %>%
  filter(!race %in% race_exclusion) %>%
  group_by(race, year) %>%
  summarise(MeanIncome = mean(income_mean, na.rm = TRUE))
custom_colors <- c("red", "blue", "orange", "green", "violet", "yellow")

ggplot(filtered_data_race, aes(x = year, y = MeanIncome, color = race)) +
  geom_line(aes(linewidth = 1.5), na.rm = TRUE) +  
  geom_point() +
  labs(x = "Year", y = "Mean Income", title = "Trends in Mean income over the years (1967 - 2019)") +
  scale_color_manual(values = custom_colors) +
  theme_minimal() +
  theme(plot.title = element_text(colour = 'black', family = 'Georgia', size = 16, hjust = 0.5),
        plot.subtitle = element_text(colour = 'black', family = 'Georgia', size = 12),
        axis.title = element_text(colour = 'black', family = 'Georgia', size = 16),
        axis.text = element_text(colour = 'black', family = 'Georgia', size = 14),
        legend.text = element_text(colour = 'black', family = 'Georgia', size = 10),
        legend.position = 'top',
        legend.title = element_blank(),
        plot.background = element_rect(fill = 'grey60'),
        panel.background = element_rect(fill = 'grey60', colour = 'grey60'))
Income share among races in US.

###upload data###
income_aggregate <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/income_aggregate.csv')
###check race count###
#####pretreatment for this dataset###
df1 <- income_aggregate %>% na.omit() %>% 
  subset(year>=2000 & income_quintile!="Top 5%")
#####reorder income_quintile###
  labs(title = "The income share of different races in 21th century ", xlab="Year",ylab="Income share",color="Classification")+
  scale_x_continuous(breaks = c(2000,2010,2019))+theme_bw()

Is Racial disparities cause wealth disparities in US?

To analyse thsi we used two datssets, that provides data about the family wealth by race/year/measure normalized to 2016, and the amount of money people own after their retirement.

race_wealth <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/race_wealth.csv')
retirement <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-02-09/retirement.csv')
wealthperrace <- race_wealth %>% 
  group_by(race, year) %>% 
  summarise(wealth = mean(wealth_family, na.rm= TRUE))
wealth_plot <- ggplot(wealthperrace, aes(x = year, y = wealth, color = race)) +
  geom_line(aes(linewidth = 1), na.rm = TRUE) +
  geom_point(na.rm = TRUE) +
  labs(x = "Year", y = "Distribution of wealth per race", title = "Distribution of average family wealth by race (1963-2016)") +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$")) +
  scale_color_manual(values = c("Black" = "blue", "Hispanic" = "green", "White" = "red")) +
  theme_minimal() +
  theme(plot.title = element_text(colour = 'black', family = 'Georgia', size = 16, hjust = 0.5),
        plot.subtitle = element_text(colour = 'black', family = 'Georgia', size = 18),
        axis.title = element_text(colour = 'black', family = 'Georgia', size = 14),
        axis.text = element_text(colour = 'black', family = 'Georgia', size = 14),
        legend.text = element_text(colour = 'black', family = 'Georgia', size = 16),
        legend.position = 'top',
        legend.title = element_blank(),
        plot.background = element_rect(fill = 'grey60'),
        panel.background = element_rect(fill = 'grey60', colour = 'grey60'))

wealth_retirement <-  retirement %>% 
  group_by(race, year) %>% 
  summarise(wealth_ret = mean(retirement, na.rm= TRUE))
ret_plot <- ggplot(wealth_retirement, aes(x = year, y = wealth_ret, color = race)) +
  geom_line(aes(linewidth = 1), na.rm = TRUE) +
  geom_point(na.rm = TRUE) +
  labs(x = "Year", y = "Retirement wealth", title = "Distribution of retirement wealth by race (1963-2016)") +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$")) +
  scale_color_manual(values = c("Black" = "blue", "Hispanic" = "green", "White" = "red")) +
  theme_minimal() +
  theme(plot.title = element_text(colour = 'black', family = 'Georgia', size = 16, hjust = 0.5),
        plot.subtitle = element_text(colour = 'black', family = 'Georgia', size = 18),
        axis.title = element_text(colour = 'black', family = 'Georgia', size = 14),
        axis.text = element_text(colour = 'black', family = 'Georgia', size = 14),
        legend.text = element_text(colour = 'black', family = 'Georgia', size = 16),
        legend.position = 'top',
        legend.title = element_blank(),
        plot.background = element_rect(fill = 'grey60'),
        panel.background = element_rect(fill = 'grey60', colour = 'grey60'))

combined_plots <- grid.arrange(wealth_plot, ret_plot, ncol = 2)

