CEO Departures

Plot No. 1

By Patrizia Maier

# get packages 
library(tidyverse)
library(gender)
library(genderdata)
library(extrafont)
library(ggtext)
library(waffle)
# font_import() # only once 
loadfonts(device = "win", quiet = TRUE) # every time 
# windowsFonts() # to check available options 

# get tidy tuesday data 
tuesdata <- tidytuesdayR::tt_load('2021-04-27')
departures <- tuesdata$departures

# text mining gender information (only male/female)
# extract names
names <- departures$exec_fullname %>% 
  str_split_fixed(pattern=" ", n=5) %>% # split names into 5 column matrix
  as_tibble() %>% # convert to tibble
  mutate(first_name=case_when(
    str_detect(V1, "\\.") ~ V2, # if abbreviated first name, get second name
    TRUE ~ V1 # else first name))

# perform gender guess prediction 
gender_guess <- names$first_name %>% 
  gender(years = c(1920, 1985), method = "ssa") %>% # apply gender estimation
  select(!starts_with("year")) %>% # select variables 
  distinct() %>%  # remove duplicates 
  rename("gender_name"="gender") # rename variable for clarity 

# Caution: Some names are ambiguous and can be female or male (see proportion). 
# In the data set, quite a few CEO's with highly likely "female" names are actually "male". 
# This is implied by the information in 'notes' (e.g., "Mr. ", "he", ...).
# Besides, there are quite a few NA's in case of abbreviated first names (e.g., "L.").
# --> Therefore we need more text mining based on 'notes'. 

# add information from 'notes'
indicator_male <- c("\\Whe\\W", "\\Whe's\\W","\\Whis\\W", "\\WMr\\W")
indicator_female <- c("\\Wshe\\W", "\\Wshe's\\W", "\\Wher\\W", "\\WMrs\\W", "\\WMs\\W", "\\Wlady\\W")

departures <- departures %>% 
  bind_cols(names) %>% 
  left_join(gender_guess, by=c("first_name"="name")) %>%  
  mutate(gender_notes_fem=str_detect(notes, regex(paste(indicator_female, collapse = "|"), ignore_case = T)),
         gender_notes_male=str_detect(notes, regex(paste(indicator_male, collapse = "|"), ignore_case = T))) %>% 
  mutate(final_gender_guess=case_when(
    proportion_male > 0.99 ~ "male",
    proportion_female > 0.99 ~ "female",
    gender_notes_male & !gender_notes_fem ~ "male",
    gender_notes_fem & !gender_notes_male ~ "female",
    !gender_notes_male & !gender_notes_fem ~ gender_name)) %>% 
  mutate(final_gender_guess=as.factor(final_gender_guess))

# summary(departures$final_gender_guess)
# female   male   NA's 
#   299   8850    274 

# prepare data for plotting 
data_gender <- departures %>% 
  group_by(fyear) %>% 
  count(final_gender_guess) %>% 
  drop_na(final_gender_guess) %>% 
  complete(final_gender_guess, fill=list(n=0)) %>% 
  filter(fyear>=1992 & fyear <2020)

# create waffle plot
p1 <- ggplot(data_gender, aes(fill = final_gender_guess, values = n)) +
  geom_waffle(color = "#faf7f1", size = .5, n_rows = 5, flip = TRUE) +
  facet_wrap(~fyear,  nrow = 1, strip.position = "bottom") +
  scale_fill_manual(values = c("#ef8f10", "#9f969b")) + 
  scale_x_discrete() + 
  scale_y_continuous(labels = function(x) x * 5, # make this multiplier the same as n_rows
                     expand = c(0,0)) +
  coord_equal(clip="off") + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "#faf7f1", linetype = "blank"),
        plot.title = element_markdown(size=30), 
        plot.title.position = "plot",
        plot.subtitle = element_markdown(size=20),
        plot.caption = element_markdown(size=9),
        plot.caption.position = "plot",
        plot.margin = unit(c(1,1,0.5,1), "cm"),
        panel.grid = element_blank(),
        panel.spacing.x = unit(0.75, "lines"),
        axis.ticks.y = element_line(),
        legend.position = "none", 
        legend.title = element_blank(),
        legend.direction = "horizontal",
        text=element_text(family = "Bahnschrift", size=11),
        strip.switch.pad.wrap = unit(0, "cm"),
        strip.text = element_text(angle = 45)) +
  labs(title="\nCEO departures in major US firms", 
       subtitle="highlighting **<span style = 'color:#ef8f10;'>female</span>** CEO's",
       caption = "Dataviz: Patrizia Maier | Data: Gentry et al. 2021 | Firms from S&P 1500 index") 

Bonus Plot: