Targets pipeline

targets pipeline

We use the magical {targets} package to run our analysis and keep track of all dependencies automatically.

To build our entire project, run targets::tar_make() at the R console.

Here’s our complete pipeline:

Actual code

All the data processing is handled with dataset-specific functions that live in R/funs_data-cleaning.R, which targets then runs as needed. For the sake of transparency, here’s that code:

`R/funs_data-cleaning.R`

Code

library(readxl)
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(clock))
library(countrycode)
library(jsonlite)
suppressPackageStartupMessages(library(sf))

clean_iccpr_who <- function(path, subregions_path) {
  who_regions <- tribble(
    ~who_region, ~who_region_long,
    "AFRO", "Regional Office for Africa",
    "AMRO", "Regional Office for the Americas",
    "SEARO", "Regional Office for South-East Asia",
    "EURO", "Regional Office for Europe",
    "EMRO", "Regional Office for the Eastern Mediterranean",
    "WPRO", "Regional Office for the Western Pacific"
  )
  
  who_subregions_manual <- tribble(
    ~member_states, ~region_abbr, ~stratum, ~subregion_imputed,
    "American Samoa", "WPR", "B", TRUE,
    "Anguilla", "AMR", "B", TRUE,
    "Aruba", "AMR", "B", TRUE,
    "Bermuda", "AMR", "B", TRUE,
    "British Virgin Islands", "AMR", "B", TRUE,
    "Cayman Islands", "AMR", "B", TRUE,
    "Curaçao", "AMR", "B", TRUE,
    "French Guiana", "AMR", "B", TRUE,
    "French Polynesia", "WPR", "B", TRUE,
    "Kosovo", "EUR", "B", TRUE,
    "Liechtenstein", "EUR", "A", TRUE,
    "Palestinian Territories", "EMR", "B", TRUE,
    "St. Barthélemy", "AMR", "B", TRUE,
    "St. Helena", "AFR", "E", TRUE,
    "St. Martin (French part)", "AMR", "B", TRUE,
    "Sint Maarten", "AMR", "B", TRUE,
    "Turks & Caicos Islands", "AMR", "B", TRUE
  )
  
  who_subregions <- read_excel(subregions_path) %>% 
    set_names(c("subregions", "member_states")) %>% 
    separate(subregions, into = c("region_abbr", "stratum"), sep = " ") %>% 
    mutate(member_states = str_split(member_states, "; ")) %>% 
    unnest(member_states) %>% 
    bind_rows(who_subregions_manual) %>% 
    mutate(
      who_region = paste0(region_abbr, "O"),
      who_subregion = paste0(region_abbr, stratum)
    ) %>% 
    replace_na(list(subregion_imputed = FALSE)) %>% 
    mutate(
      country_name = countrycode(
        member_states, origin = "country.name", destination = "country.name",
        custom_match = c("Turkey" = "Türkiye")),
      iso3 = countrycode(
        country_name, origin = "country.name", destination = "iso3c",
        custom_match = c("Kosovo" = "XKX"))
    ) %>% 
    select(iso3, who_subregion, subregion_imputed)
  
  x <- read_excel(path) %>% 
    janitor::clean_names() %>% 
    # Make this a date instead of PosixCT
    mutate(date_reported = as.Date(date_reported)) %>% 
    # All NAs here are actually 0s
    replace_na(list(iccpr_derogation_filed = 0,
                    derogation_start = 0,
                    derogation_ineffect = 0,
                    derogation_end = 0)) %>% 
    # Country names and codes fun times
    mutate(
      country_name = countrycode(
        country_code, origin = "iso2c", destination = "country.name",
        custom_match = c("XK" = "Kosovo", "TR" = "Türkiye")),
      iso3 = countrycode(
        country_code, origin = "iso2c", destination = "iso3c",
        custom_match = c("XK" = "XKX")
      )
    ) %>% 
    left_join(who_regions, by = "who_region") %>% 
    left_join(who_subregions, by = "iso3") %>% 
    # Final column order
    select(-c(country_code, country, cow_code)) %>% 
    select(country_name, iso3, who_region, who_region_long,
           who_subregion, subregion_imputed,
           day = date_reported, everything())
  
  return(x)
}

clean_iccpr_action <- function(path) {
  x <- read_excel(path) %>% 
    janitor::clean_names() %>% 
    rename(prior_iccpr_other_action = other_prior_iccpr_post_commitment_treaty_actions,
           country_name = country) %>% 
    mutate(across(starts_with("prior_"), ~case_when(
      . == 1 ~ TRUE,
      is.na(.) ~ FALSE
    )))
  
  return(x)
}

clean_oxford <- function(path) {
  x <- tibble(
    # Get a list of all the sheets in the Excel file
    index_name = excel_sheets(path)
  ) %>% 
    # Read each sheet
    mutate(data = map(index_name, ~read_excel(path, sheet = .x))) %>% 
    # Standardize the index name based on the sheet name
    mutate(index_name = janitor::make_clean_names(index_name)) %>% 
    # Make each data frame cell in the list column long and a little cleaner
    mutate(clean = map(data, ~{
      .x %>% 
        pivot_longer(cols = -c(country_code, country_name),
                     names_to = "day", values_to = "value") %>% 
        mutate(day = dmy(day))
    })) %>% 
    # Get rid of the original wide data frame and unnest the long clean data
    select(-data) %>% 
    unnest(clean) %>% 
    # Make data wide so that there's a column for each index and row for each
    # country-day
    pivot_wider(names_from = "index_name", values_from = "value") %>% 
    # Country names and codes fun times
    mutate(iso3 = recode(country_code, "RKS" = "XKX")) %>% 
    mutate(country_name = countrycode(
      iso3, origin = "iso3c", destination = "country.name",
      custom_match = c("XKX" = "Kosovo", "TUR" = "Türkiye")
    )) %>% 
    # Get rid of countries with all missing data
    group_by(country_name) %>% 
    filter(!all(is.na(stringency_index))) %>% 
    ungroup() %>% 
    # Shorten these long column names
    rename(
      c3_cancel_events = c3_cancel_public_events, 
      c4_gatherings = c4_restrictions_on_gaterings,
      c5_public_transport = c5_close_public_transport, 
      c6_stay_at_home = c6_stay_at_home_requirements,
      c7_internal_movement = c7_movement_restrictions_intern, 
      c8_intl_travel = c8_international_travel_control
    ) %>% 
    # Make binary versions of these columns
    mutate(across(c(c3_cancel_events, c4_gatherings,
                    c5_public_transport, c6_stay_at_home,
                    c7_internal_movement, c8_intl_travel,
                    e1_income_support, e2_debt_relief),
                  list(bin = ~ ifelse(. > 0, 1, 0)))) %>% 
    # Create indicators for whether policies were added, removed, or never changed
    group_by(country_name) %>% 
    mutate(across(c(c3_cancel_events, c4_gatherings,
                    c5_public_transport, c6_stay_at_home,
                    c7_internal_movement, c8_intl_travel,
                    e1_income_support, e2_debt_relief),
                  list(added = ~any(c(NA, diff(.)) >= 1, na.rm = TRUE),
                       removed = ~any(c(NA, diff(.)) <= -1, na.rm = TRUE),
                       never = ~all(c(NA, diff(.)) == 0, na.rm = TRUE)))) %>% 
    ungroup() %>% 
    # Final column order
    select(-country_code) %>% 
    select(country_name, iso3, day, everything())
  
  return(x)
}

clean_pandem <- function(path) {
  pandem_raw <- read_excel(path)
  
  pandem_levels <- c("None" = "0", "Minor" = "1", "Moderate" = "2", "Major" = "3")
  
  pandem_clean <- pandem_raw %>% 
    mutate(year_quarter = paste0(year, "-", quarter)) %>% 
    mutate(iso3 = countrycode(country_name, 
                              origin = "country.name", 
                              destination = "iso3c"),
           country_name = countrycode(iso3, origin = "iso3c", 
                                      destination = "country.name",
                                      custom_match = c("TUR" = "Türkiye"))) %>% 
    select(country_name, iso3, year, year_quarter, 
           pandem, panback, 
           pandem_discrim = type1, 
           pandem_ndrights = type2,
           pandem_abusive = type3,
           pandem_nolimit = type4,
           pandem_media = type7) %>% 
    # Make these 0-3 columns factors
    mutate(across(starts_with("pandem_"), ~factor(., levels = pandem_levels, ordered = TRUE))) %>% 
    # Add labels
    mutate(across(starts_with("pandem_"), ~fct_recode(., !!!pandem_levels))) %>% 
    # Drop unused levels
    mutate(across(starts_with("pandem_"), ~fct_drop(.)))
  
  return(pandem_clean)
}

clean_vdem <- function(path) {
  vdem_raw <- read_rds(path) %>% as_tibble()
  
  vdem_clean <- vdem_raw %>% 
    filter(year >= 2020) %>% 
    mutate(country_name = countrycode(
      country_text_id, origin = "iso3c", destination = "country.name",
      custom_match = c("XKX" = "Kosovo", "ZZB" = "Zanzibar", 
                       "PSG" = "Palestine (Gaza)", "SML" = "Somaliland", 
                       "TUR" = "Türkiye")
    )) %>% 
    select(country_name, iso3 = country_text_id, year,
           
           # Civil society stuff
           v2csreprss,  # CSO repression
           v2xcs_ccsi,  # Core civil society index (entry/exit, repression, participatory env)
           
           # Human rights and politics
           # Political corruption index (less to more, 0-1) (public sector +
           # executive + legislative + judicial corruption)
           v2x_corr,
           v2x_rule,  # Rule of law index
           
           # Rights indexes
           v2x_civlib,  # Civil liberties index
           v2x_clphy,  # Physical violence index
           v2x_clpriv,  # Private civil liberties index
           v2x_clpol,  # Political civil liberties index
           
           # Democracy
           v2x_polyarchy, v2x_libdem, v2x_regime_amb
    )
  
  return(vdem_clean)
}

create_daily_skeleton <- function(iccpr_who, oxford, pandem, vdem) {
  all_countries <- list(unique(iccpr_who$iso3), 
                        unique(oxford$iso3), 
                        unique(pandem$iso3),
                        unique(vdem$iso3))
  
  countries_in_all_data <- reduce(all_countries, intersect)
  
  # first_day <- min(oxford$day)
  first_day <- ymd("2020-03-11")
  last_day <- max(oxford$day)
  
  daily_skeleton <- expand_grid(
    iso3 = countries_in_all_data,
    day = seq(first_day, last_day, by = "1 day")
  ) %>% 
    mutate(year = year(day),
           year_quarter = quarter(day, type = "year.quarter"),
           day_num = as.numeric(day) - as.numeric(ymd("2020-03-10")),
           year_week = calendar_narrow(as_iso_year_week_day(day), "week"),
           year_week_day = as_year_month_day(calendar_narrow(set_day(year_week, 1), "day")),
           year_quarter = calendar_narrow(as_year_quarter_day(day), "quarter"),
           year_quarter_day = as_year_month_day(calendar_narrow(set_day(year_quarter, 1), "day"))) %>% 
    # Force the year_week column to be text since it's a weird {clock} class
    mutate(year_week = as.character(year_week),
           year_week_day = as.character(year_week_day),
           year_quarter = as.character(year_quarter),
           year_quarter_day = as.character(year_quarter_day)) %>% 
    # Pandem starts Q2 2020 on March 11 instead of April 1
    mutate(year_quarter = ifelse(year_quarter == "2020-Q1", "2020-Q2", year_quarter)) %>% 
    mutate(country_name = countrycode(
      iso3, origin = "iso3c", destination = "country.name",
      custom_match = c("XKX" = "Kosovo", "TUR" = "Türkiye")
    )) %>% 
    select(country_name, iso3, day, day_num, year, 
           year_quarter, year_quarter_day, year_week, year_week_day)
  
  return(daily_skeleton)
}

make_final_data <- function(skeleton, iccpr_who, iccpr_action, oxford, pandem, vdem) {
  daily_final <- skeleton %>% 
    left_join(select(iccpr_who, -country_name), by = c("iso3", "day")) %>% 
    left_join(select(iccpr_action, -country_name), by = c("iso3")) %>% 
    left_join(select(oxford, -country_name), by = c("iso3", "day")) %>% 
    left_join(select(pandem, -c(country_name, year)), by = c("iso3", "year_quarter")) %>% 
    left_join(select(vdem, -country_name), by = c("iso3", "year"))
  
  return(daily_final)
}

make_weekly_data <- function(daily_final) {
  weekly_final <- daily_final %>% 
    group_by(year_week, year_week_day, country_name, iso3, who_region, who_region_long, 
             who_subregion, prior_iccpr_derogations, prior_iccpr_other_action) %>% 
    summarize(across(c(new_cases, new_deaths), ~sum(., na.rm = TRUE)),
              across(c(iccpr_derogation_filed, derogation_start, 
                       derogation_ineffect, derogation_end), ~max(., na.rm = TRUE)),
              across(matches("[ce]\\d_"), ~max(., na.rm = TRUE)),
              across(c(pandem, panback, starts_with("pandem_"), starts_with("v2")), ~max(., na.rm = TRUE))) %>% 
    group_by(country_name) %>% 
    mutate(cumulative_cases = cumsum(new_cases),
           cumulative_deaths = cumsum(new_deaths)) %>% 
    mutate(year_week_num = 1:n(), .after = "year_week") %>% 
    ungroup() %>% 
    arrange(country_name)
  
  return(weekly_final)
}

make_quarterly_data <- function(daily_final) {
  quarterly_final <- daily_final %>% 
    group_by(year_quarter, year_quarter_day, country_name, iso3, who_region, who_region_long, 
      who_subregion, prior_iccpr_derogations, prior_iccpr_other_action) %>% 
    summarize(across(c(new_cases, new_deaths), ~sum(., na.rm = TRUE)),
      across(c(iccpr_derogation_filed, derogation_start, 
        derogation_ineffect, derogation_end), ~max(., na.rm = TRUE)),
      across(matches("[ce]\\d_"), ~max(., na.rm = TRUE)),
      across(c(pandem, panback, starts_with("pandem_"), starts_with("v2")), ~max(., na.rm = TRUE))) %>% 
    group_by(country_name) %>% 
    mutate(cumulative_cases = cumsum(new_cases),
      cumulative_deaths = cumsum(new_deaths)) %>% 
    mutate(year_quarter_num = 1:n(), .after = "year_quarter") %>%
    ungroup() %>% 
    arrange(country_name)
  
  return(quarterly_final)
}

make_year_week_lookup <- function(weekly_final) {
  year_week_lookup <- weekly_final %>% 
    distinct(year_week, year_week_num, year_week_day) %>% 
    mutate(year_week_day = ymd(year_week_day))
  
  return(year_week_lookup)
}

make_year_quarter_lookup <- function(quarterly_final) {
  year_quarter_lookup <- quarterly_final %>% 
    distinct(year_quarter, year_quarter_num, year_quarter_day) %>% 
    mutate(year_quarter_day = ymd(year_quarter_day))
  
  return(year_quarter_lookup)
}

load_world_map <- function(path) {
  world_map <- read_sf(path) %>%
    filter(ISO_A3 != "ATA")
  
  return(world_map)
}

make_derogation_count <- function(data) {
  new_derogations <- data %>% 
    group_by(iso3) %>% 
    summarize(derogations = sum(iccpr_derogation_filed))
  
  return(new_derogations)
}

make_derogation_map_data <- function(derogation_count, map) {
  map_with_derogations <- map %>%
    # Fix some Natural Earth ISO weirdness
    mutate(ISO_A3 = ifelse(ISO_A3 == "-99", as.character(ISO_A3_EH), as.character(ISO_A3))) %>%
    mutate(ISO_A3 = case_when(
      .$ISO_A3 == "GRL" ~ "DNK",
      .$NAME == "Norway" ~ "NOR",
      .$NAME == "Kosovo" ~ "XKK",
      TRUE ~ ISO_A3
    )) %>%
    left_join(derogation_count, by = join_by(ISO_A3 == iso3)) %>% 
    mutate(derogations_1plus = ifelse(derogations == 0, NA, derogations))
  
  return(map_with_derogations)
}

# When using a file-based target, {targets} requires that the function that
# saves the file returns a path to the file. write_csv() and write_dta() both
# invisibly return the data frame being written, and saveRDS() returns NULL, so
# we need some wrapper functions to save the files and return the paths.
save_csv <- function(df, path) {
  readr::write_csv(df, path)
  return(path)
}

save_r <- function(df, path) {
  saveRDS(df, path)
  return(path)
}

save_dta <- function(df, path) {
  haven::write_dta(df, path)
  return(path)
}

`R/funs_graphics.R`

Code

clrs <- MetBrewer::met.brewer("Tiepolo")

set_annotation_fonts <- function() {
  ggplot2::update_geom_defaults("label", list(family = "Noto Sans", face = "plain"))
  ggplot2::update_geom_defaults("text", list(family = "Noto Sans", face = "plain"))
}

theme_pandem <- function(base_size = 11, base_family = "Noto Sans", prior = FALSE) {
  ret <- theme_bw(base_size, base_family) +
    theme(panel.background = element_rect(fill = "#ffffff", colour = NA),
          title = element_text(size = rel(1), family = "Noto Sans", face = "bold"),
          plot.subtitle = element_text(size = rel(0.8),
                                       family = "Noto Sans", face = "plain"),
          plot.caption = element_text(margin = margin(t = 10), size = rel(0.6),
                                      family = "Noto Sans", face = "plain"),
          panel.border = element_rect(color = "grey50", fill = NA, linewidth = 0.15),
          panel.spacing = unit(1, "lines"),
          panel.grid.minor = element_blank(),
          panel.grid.major = element_line(linewidth = 0.25, colour = "grey90"),
          axis.line = element_blank(),
          axis.ticks = element_blank(),
          axis.title = element_text(size = rel(0.8),
                                    family = "Noto Sans", face = "bold"),
          axis.title.x = element_text(hjust = 0, margin = margin(t = 10)),
          axis.title.y = element_text(hjust = 1, margin = margin(r = 10)),
          legend.position = "bottom",
          legend.title = element_text(size = rel(0.7), vjust = 0.5,
                                      family = "Noto Sans", face = "plain"),
          legend.key.size = unit(0.7, "line"),
          legend.key = element_blank(),
          legend.spacing = unit(0.1, "lines"),
          legend.justification = "left",
          legend.margin = margin(t = -5, b = 0, l = 0, r = 0),
          strip.text = element_text(size = rel(0.9), hjust = 0,
                                    family = "Noto Sans", face = "bold"),
          strip.background = element_rect(fill = "white", colour = NA))
  
  if (prior) {
    ret <- ret +
      theme(panel.grid.major = element_blank(),
            axis.title.y = element_blank(),
            axis.text.y = element_blank(),
            panel.border = element_blank())
  } else {
    ret
  }
}

`R/funs_models.R`

Code

# Running modelsummary() on Bayesian models takes a while because of all the
# calculations involved in creating the GOF statistics. With modelsummary 0.7+,
# though it's now possible to build the base model with modelsummary(..., output
# = "modelsummary_list"), save that as an intermediate object, and then feed it
# through modelsummary() again with whatever other output you want. The
# modelsummary_list-based object thus acts like an output-agnostic ur-model.

build_modelsummary <- function(model_df) {
  msl <- model_df %>% 
    pull(model, name = nice) %>% 
    modelsummary::modelsummary(output = "modelsummary_list",
                               statistic = "[{conf.low}, {conf.high}]",
                               ci_method = "hdi",
                               metrics = c("R2"))
  return(msl)
}

# This is how to get other stats like ELPD (using metrics = "LOOIC" is required
# for that; metrics = "R2" provides nobs). I would do this for all the summary
# tables, but for whatever reason, it takes forever to calculate ELPD/LOO stuff
# on ordered logit models, so we just show N instead
#
# gm <- tribble(
#   ~raw,        ~clean,      ~fmt, ~omit,
#   "nobs",      "N",         0,    FALSE,
#   "r.squared", "R2",        3,    FALSE,
#   "elpd",      "ELPD",      1,    FALSE,
#   "elpd.se",   "ELPD (SE)", 1,    FALSE
# )
# 
# modelsummary(models_policies,
#              estimate = "{estimate}",
#              statistic = "conf.int",
#              gof_map = gm,
#              metrics = c("R2", "LOOIC"))

gof_map <- tribble(
  ~raw,          ~clean,                 ~fmt, ~omit,
  "nobs",        "N",                    0,    FALSE,
  "r.squared",   "\\(R^2\\) (total)",    2,    FALSE,
  "r2.marginal", "\\(R^2\\) (marginal)", 2,    FALSE
)

coef_map <- c(
  "b_derogation_ineffect" = "Derogation in effect",
  "b_new_cases_z" = "New cases (standardized)",
  "b_cumulative_cases_z" = "Cumulative cases (standardized)",
  "b_new_deaths_z" = "New deaths (standardized)",
  "b_cumulative_deaths_z" = "Cumulative deaths (standardized)",
  "b_prior_iccpr_derogationsTRUE" = "Past ICCPR derogation",
  "b_prior_iccpr_other_actionTRUE" = "Past ICCPR action",
  "b_v2x_rule" = "Rule of law",
  "b_v2x_civlib" = "Civil liberties",
  "b_v2xcs_ccsi" = "Core civil society index",
  "b_Intercept" = "Constant",
  "b_Intercept[1]" = "Cut 1",
  "b_Intercept[2]" = "Cut 2",
  "b_Intercept[3]" = "Cut 3",
  "b_year_week_num" = "Year-week",
  "sd_country_name__Intercept" = "Country random effects σ",
  "sd_who_region__Intercept" = "Region random effects σ"
)


# Model definitions
f_policies <- function(panel) {
  BAYES_SEED <- 1757  # From random.org
  
  panel <- panel %>% 
    mutate(across(c(new_cases, new_deaths, cumulative_cases, cumulative_deaths),
                  list(z = ~as.numeric(scale(.)))))
  
  # Use .data[[blah]] for tidy evaluation with strings
  # https://www.tidyverse.org/blog/2019/06/rlang-0-4-0/#other-simple-tidy-evaluation-patterns
  never_filter <- function(x) {
    panel %>% 
      filter(.data[[x]] == 0)
  }
  
  logit_priors <- c(prior(student_t(1, 0, 3), class = Intercept),
                    prior(student_t(1, 0, 3), class = b),
                    prior(cauchy(0, 1), class = sd, lb = 0))
  
  policies_model <- function(y, data) {
    form <- glue::glue(y, " ~ derogation_ineffect + new_cases_z + cumulative_cases_z + ",
                       "new_deaths_z + cumulative_deaths_z + ",
                       "prior_iccpr_derogations + prior_iccpr_other_action + ",
                       "v2x_rule + v2x_civlib + v2xcs_ccsi + ",
                       "year_week_num + (1 | country_name)") %>% 
      as.formula()
    
    # Use rlang::inject() to evaluate the formula object before running the model
    # so that the formula shows up correctly in summary(). 
    # See https://community.rstudio.com/t/tidy-evaluation-and-formulae/4561/17
    rlang::inject(
      brm(
        bf(!!form), 
        data = data,
        family = bernoulli(),
        prior = logit_priors,
        chains = 4, seed = BAYES_SEED,
        threads = threading(2)  # Two CPUs per chain to speed things up
      )
    )
  }
  
  policies_models <- tribble(
    ~nice, ~y, ~never,
    "Cancel Public Events", "c3_cancel_events_bin", "c3_cancel_events_never",
    "Gathering Restrictions", "c4_gatherings_bin", "c4_gatherings_never",
    "Close Public Transit", "c5_public_transport_bin", "c5_public_transport_never",
    "Movement", "c7_internal_movement_bin", "c7_internal_movement_never",
    "International Travel", "c8_intl_travel_bin", "c8_intl_travel_never"
  ) %>% 
    mutate(data = map(never, ~never_filter(.))) %>% 
    mutate(model = map2(y, data, ~policies_model(.x, .y)))
  
  return(policies_models)
}


f_human_rights <- function(panel) {
  BAYES_SEED <- 6440  # From random.org
  
  panel <- panel %>% 
    mutate(across(c(new_cases, new_deaths, cumulative_cases, cumulative_deaths),
                  list(z = ~as.numeric(scale(.)))))
  
  logit_priors <- c(prior(student_t(1, 0, 3), class = Intercept),
                    prior(student_t(1, 0, 3), class = b),
                    prior(cauchy(0, 1), class = sd, lb = 0))
  
  ologit_priors <- c(prior(student_t(1, 0, 3), class = Intercept),
                     prior(student_t(1, 0, 3), class = b),
                     prior(cauchy(0, 1), class = sd, lb = 0))
  
  # It would be neat to include country random effects but there's not enough
  # variation within lots of the countries to pick up any of the effect. When 
  # (1 | country_name) is included, the model predicts a 100% chance of 
  # pandem_discrim == 1 and a 0% chance of pandem_discrim == 0, which is
  # annoying (and it takes 45 minutes to run, ugh)
  #
  # So instead we use region random effects
  human_rights_model <- function(y, family, prior) {
    form <- glue::glue(y, " ~ derogation_ineffect + new_cases_z + cumulative_cases_z + ",
                       "new_deaths_z + cumulative_deaths_z + ", 
                       "prior_iccpr_derogations + prior_iccpr_other_action + ",
                       "v2x_rule + v2x_civlib + v2xcs_ccsi + ",
                       "year_quarter_num + (1 | who_region)") %>% 
      as.formula()
    
    # Use rlang::inject() to evaluate the formula object before running the model
    # so that the formula shows up correctly in summary(). 
    # See https://community.rstudio.com/t/tidy-evaluation-and-formulae/4561/17
    rlang::inject(
      brm(
        bf(!!form), 
        data = panel,
        family = family,
        prior = prior,
        chains = 4, seed = BAYES_SEED,
        threads = threading(2)  # Two CPUs per chain to speed things up
      )
    )
  }
  
  human_rights_models <- tribble(
    ~nice, ~y, ~family, ~prior,
    "Discriminatory Policy", "pandem_discrim", "cumulative", ologit_priors,
    "Non-Derogable Rights", "pandem_ndrights", "bernoulli", logit_priors,
    "No Time Limit Measures", "pandem_nolimit", "cumulative", ologit_priors,
    "Abusive Enforcement", "pandem_abusive", "cumulative", ologit_priors
  ) %>% 
    # Neat pattern for using named list elements in pmap instead of ..1, ..2, etc.
    # https://stackoverflow.com/a/66147672/120898
    # But we can't use that here because of ... issues nested in a function like
    # this. And there are similar issues when using ..1, ..2, etc. when using
    # the formula syntax like ~human_rights_model(..1, ..2, ..3). Everything works fine
    # without the anonymous lambda ~ syntax though
    mutate(model = pmap(lst(y, family, prior), human_rights_model))
  
  return(human_rights_models)
}

`R/funs_plots.R`

Code

# Diagnostic plots

plot_trace <- function(model, params) {
  model %>% 
    tidybayes::gather_draws(!!!syms(params)) %>% 
    ggplot(aes(x = .iteration, y = .value, color = factor(.chain))) +
    geom_line(linewidth = 0.1) +
    scale_color_viridis_d(option = "rocket", end = 0.85) +
    labs(color = "Chain") +
    facet_wrap(vars(.variable), scales = "free_y") +
    theme_pandem()
}

plot_trank <- function(model, params) {
  model %>% 
    tidybayes::gather_draws(!!!syms(params)) %>% 
    group_by(.variable) %>% 
    mutate(draw_rank = rank(.value)) %>% 
    ggplot(aes(x = draw_rank, color = factor(.chain))) +
    stat_bin(geom = "step", binwidth = 200, position = position_identity(), boundary = 0) +
    scale_color_viridis_d(option = "rocket", end = 0.85) +
    labs(color = "Chain") +
    facet_wrap(vars(.variable), scales = "free_y") +
    theme_pandem()
}

plot_pp <- function(model) {
  bayesplot::pp_check(model, ndraws = 100, type = "bars") +
    theme_pandem()
}

fmt_p_inline <- function(x, direction) {
  x <- round(x, 2)
  
  if (direction == "gt") {
    out <- glue::glue(r"[$p(\Delta > 0) = {x}$]")
  } else {
    out <- glue::glue(r"[$p(\Delta < 0) = {x}$]")
  }
  
  return(out)
}


# Storing ggplot objects as rds files is BAD 
#   (https://github.com/hadley/ggplot2-book/issues/344)
#
# But it's also necessary/recommended when working with {targets}---the basic
# walkthrough in the manual even shows how to create a plot as a target
# 
# ggplot objects are strange beasts because they store a copy of the overall
# environment when serialized into rds files. When working with regular-sized
# datasets, this isn't really ever a problem. But when working with tidy
# data frames of MCMC chains with millions and millions of rows, this can create
# rds files that are hundreds or thousands of MBs, which is wild.
#
# This post goes into more details about how to fix it: 
# https://ropensci.org/blog/2022/12/06/save-ggplot2-targets/
#
# Instead of using stat_lineribbon() on tidy MCMC draws like I normally do, it's
# better to collapse the data down first with median_qi() and then use
# geom_lineribbon(). This results in much tinier data frames and plots render
# immediately in .qmd files

library(marginaleffects)

build_policies_plot_data <- function(model_df, year_week_lookup) {
  model_df_preds_mfx <- model_df %>% 
    mutate(pred_data = map(model, ~{
      datagrid(model = .x,
               year_week_num = 1:69,
               derogation_ineffect = 0:1) %>% 
        tidybayes::add_epred_draws(.x) %>% 
        left_join(year_week_lookup, by = "year_week_num") %>% 
        mutate(derogation_ineffect = factor(derogation_ineffect, 
                                            levels = 0:1,
                                            labels = c("No", "Yes"),
                                            ordered = TRUE)) %>% 
        group_by(year_week_day, derogation_ineffect) %>% 
        tidybayes::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
    })) %>% 
    mutate(mfx_data = map(model, ~{
      .x %>% 
        comparisons(newdata = datagrid(year_week_num = 1:69),
                    variables = "derogation_ineffect",
                    type = "response")  %>%  
        posterior_draws() %>% 
        left_join(year_week_lookup, by = "year_week_num") %>% 
        group_by(year_week_day) %>% 
        reframe(post_medians = tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95)),
                p_gt_0 = sum(draw > 0) / n()) %>% 
        unnest(post_medians) %>% 
        rename(draw = y, .lower = ymin, .upper = ymax)
    }))
  
  return(model_df_preds_mfx)
}


build_human_rights_plot_data <- function(model_df, year_quarter_lookup) {
  model_df_preds_mfx <- model_df %>% 
    mutate(pred_data = map2(model, family, ~{
      if (.y == "cumulative") {
        df <- datagrid(model = .x,
                       year_quarter_num = 1:6,
                       derogation_ineffect = 0:1) %>% 
          tidybayes::add_epred_draws(.x) %>% 
          left_join(year_quarter_lookup, by = "year_quarter_num") %>% 
          mutate(derogation_ineffect = factor(derogation_ineffect, 
                                              levels = 0:1,
                                              labels = c("No derogation", "Derogation in effect"),
                                              ordered = TRUE)) %>% 
          group_by(year_quarter_day, year_quarter, .category, derogation_ineffect) %>% 
          tidybayes::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
      } else {
        df <- datagrid(model = .x,
                       year_quarter_num = 1:6,
                       derogation_ineffect = 0:1) %>% 
          tidybayes::add_epred_draws(.x) %>% 
          left_join(year_quarter_lookup, by = "year_quarter_num") %>% 
          mutate(derogation_ineffect = factor(derogation_ineffect, 
                                              levels = 0:1,
                                              labels = c("No", "Yes"),
                                              ordered = TRUE)) %>% 
          group_by(year_quarter_day, year_quarter, derogation_ineffect) %>% 
          tidybayes::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
      }
    })) %>% 
    mutate(mfx_data = pmap(list(model, family, y), \(.model, .family, .y) {
      mfx <- .model %>% 
        comparisons(newdata = datagrid(year_quarter_num = 1:6),
                    variables = "derogation_ineffect",
                    type = "response")  %>%  
        posterior_draws() %>% 
        left_join(year_quarter_lookup, by = "year_quarter_num")
      
      if (.family == "cumulative") {
        mfx <- mfx %>% 
          mutate(group = factor(group, levels = levels(.model$data[[.y]]), ordered = TRUE)) %>% 
          group_by(year_quarter_day, year_quarter, group) %>% 
          reframe(post_medians = tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95)),
                  p_gt_0 = sum(draw > 0) / n()) %>% 
          unnest(post_medians) %>% 
          rename(draw = y, .lower = ymin, .upper = ymax)
      } else {
        mfx <- mfx %>% 
          group_by(year_quarter_day, year_quarter) %>% 
          reframe(post_medians = tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95)),
                  p_gt_0 = sum(draw > 0) / n()) %>% 
          unnest(post_medians) %>% 
          rename(draw = y, .lower = ymin, .upper = ymax) %>% 
          mutate(group = "Logit")
      }
      
      mfx
    }))
  
  return(model_df_preds_mfx)
}

build_policies_table_data <- function(model_df) {
  policies_pred_tbl <- model_df %>% 
    unnest(pred_data) %>% 
    filter(.width == 0.95) %>% 
    select(nice, year_week_day, derogation_ineffect, .epred, .lower, .upper) %>% 
    group_by(nice, derogation_ineffect) %>% 
    mutate(rank = dense_rank(.epred)) %>% 
    filter(rank == 1 | rank == max(rank)) %>% 
    mutate(rank = ifelse(rank == 1, "min", "max")) %>% 
    group_by(rank, derogation_ineffect) %>% 
    mutate(outcome = janitor::make_clean_names(nice)) %>%
    rename(draw = .epred, min = .lower, max = .upper) %>% 
    mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0)))) %>% 
    ungroup()

  policies_mfx_tbl <- model_df %>% 
    unnest(mfx_data) %>% 
    filter(.width == 0.95) %>% 
    select(nice, year_week_day, draw, .lower, .upper, p_gt_0, .width) %>% 
    group_by(nice) %>% 
    mutate(rank = dense_rank(draw)) %>% 
    filter(rank == 1 | rank == max(rank)) %>% 
    mutate(rank = ifelse(rank == 1, "min", "max")) %>% 
    group_by(rank) %>% 
    mutate(outcome = janitor::make_clean_names(nice)) %>%
    rename(min = .lower, max = .upper) %>% 
    mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0))),
           p_lt_0 = p_gt_0 - 1) %>% 
    mutate(p_gt = fmt_p_inline(p_gt_0, "gt"),
           p_lt = fmt_p_inline(p_lt_0, "lt")) %>% 
    ungroup()
  
  return(lst(policies_pred_tbl, policies_mfx_tbl))
}

build_hr_table_data <- function(model_df) {
  hr_pred_tbl <- model_df %>% 
    mutate(pred_data = map(pred_data, ~{
      .x %>% 
        mutate(derogation_ineffect = as.character(derogation_ineffect),
               derogation_ineffect = recode(derogation_ineffect,
                                            "No derogation" = "No",
                                            "Derogation in effect" = "Yes"))
    })) %>% 
    unnest(pred_data) %>% 
    filter(.width == 0.95) %>% 
    select(nice, year_quarter_day, derogation_ineffect, .category, .epred, .lower, .upper) %>% 
    group_by(nice, derogation_ineffect, .category) %>% 
    mutate(rank = dense_rank(.epred)) %>% 
    filter(rank == 1 | rank == max(rank)) %>% 
    mutate(rank = ifelse(rank == 1, "min", "max")) %>% 
    group_by(rank, derogation_ineffect, .category) %>% 
    mutate(outcome = janitor::make_clean_names(nice)) %>%
    rename(draw = .epred, min = .lower, max = .upper) %>% 
    mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0)))) %>% 
    ungroup() %>% 
    mutate(category = ifelse(is.na(.category), "NA", as.character(.category)))
  
  hr_mfx_tbl <- model_df %>% 
    mutate(mfx_data = map(mfx_data, ~{
      .x %>% 
        mutate(group = as.character(group))
    })) %>% 
    unnest(mfx_data) %>% 
    filter(.width == 0.95) %>% 
    select(nice, year_quarter_day, group, draw, .lower, .upper, p_gt_0, .width) %>% 
    group_by(nice, group) %>% 
    mutate(rank = dense_rank(draw)) %>% 
    filter(rank == 1 | rank == max(rank)) %>% 
    mutate(rank = ifelse(rank == 1, "min", "max")) %>% 
    group_by(rank, group) %>% 
    mutate(outcome = janitor::make_clean_names(nice)) %>%
    rename(min = .lower, max = .upper) %>% 
    mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0))),
           p_lt_0 = 1 - p_gt_0) %>% 
    mutate(p_gt = fmt_p_inline(p_gt_0, "gt"),
           p_lt = fmt_p_inline(p_lt_0, "lt")) %>% 
    ungroup()
  
  return(lst(hr_pred_tbl, hr_mfx_tbl))
}