targets pipeline

We use the magical targets package to run our analysis and keep track of all dependencies automatically.

To build our entire project, run targets::tar_make() at the R console.

Here’s our complete pipeline:

Actual code

All the data processing is handled with dataset-specific functions that live in R/funs_data-cleaning.R, which targets then runs as needed. For the sake of transparency, here’s that code:

R/funs_data-cleaning.R
Code
library(readxl)
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(clock))
library(countrycode)
library(jsonlite)
suppressPackageStartupMessages(library(sf))

clean_iccpr_who <- function(path) {
  who_regions <- tribble(
    ~who_region, ~who_region_long,
    "AFRO", "Regional Office for Africa",
    "AMRO", "Regional Office for the Americas",
    "SEARO", "Regional Office for South-East Asia",
    "EURO", "Regional Office for Europe",
    "EMRO", "Regional Office for the Eastern Mediterranean",
    "WPRO", "Regional Office for the Western Pacific"
  )
  
  x <- read_excel(path) %>% 
    janitor::clean_names() %>% 
    # Make this a date instead of PosixCT
    mutate(date_reported = as.Date(date_reported)) %>% 
    # All NAs here are actually 0s
    replace_na(list(iccpr_derogation_filed = 0,
                    derogation_start = 0,
                    derogation_ineffect = 0,
                    derogation_end = 0)) %>% 
    # Country names and codes fun times
    mutate(
      country_name = countrycode(
        country_code, origin = "iso2c", destination = "country.name",
        custom_match = c("XK" = "Kosovo", "TR" = "Türkiye")),
      iso3 = countrycode(
        country_code, origin = "iso2c", destination = "iso3c",
        custom_match = c("XK" = "XKX")
      )
    ) %>% 
    left_join(who_regions, by = "who_region") %>% 
    # Final column order
    select(-c(country_code, country, cow_code)) %>% 
    select(country_name, iso3, who_region, who_region_long,
           day = date_reported, everything())
  
  return(x)
}

clean_iccpr_action <- function(path) {
  x <- read_excel(path) %>% 
    janitor::clean_names() %>% 
    rename(prior_iccpr_other_action = other_prior_iccpr_post_commitment_treaty_actions,
           country_name = country) %>% 
    mutate(across(starts_with("prior_"), ~case_when(
      . == 1 ~ TRUE,
      is.na(.) ~ FALSE
    )))
  
  return(x)
}

clean_oxford <- function(path) {
  x <- tibble(
    # Get a list of all the sheets in the Excel file
    index_name = excel_sheets(path)
  ) %>% 
    # Read each sheet
    mutate(data = map(index_name, ~read_excel(path, sheet = .x))) %>% 
    # Standardize the index name based on the sheet name
    mutate(index_name = janitor::make_clean_names(index_name)) %>% 
    # Make each data frame cell in the list column long and a little cleaner
    mutate(clean = map(data, ~{
      .x %>% 
        pivot_longer(cols = -c(country_code, country_name),
                     names_to = "day", values_to = "value") %>% 
        mutate(day = dmy(day))
    })) %>% 
    # Get rid of the original wide data frame and unnest the long clean data
    select(-data) %>% 
    unnest(clean) %>% 
    # Make data wide so that there's a column for each index and row for each
    # country-day
    pivot_wider(names_from = "index_name", values_from = "value") %>% 
    # Country names and codes fun times
    mutate(iso3 = recode(country_code, "RKS" = "XKX")) %>% 
    mutate(country_name = countrycode(
      iso3, origin = "iso3c", destination = "country.name",
      custom_match = c("XKX" = "Kosovo", "TUR" = "Türkiye")
    )) %>% 
    # Get rid of countries with all missing data
    group_by(country_name) %>% 
    filter(!all(is.na(stringency_index))) %>% 
    ungroup() %>% 
    # Shorten these long column names
    rename(
      c3_cancel_events = c3_cancel_public_events, 
      c4_gatherings = c4_restrictions_on_gaterings,
      c5_public_transport = c5_close_public_transport, 
      c6_stay_at_home = c6_stay_at_home_requirements,
      c7_internal_movement = c7_movement_restrictions_intern, 
      c8_intl_travel = c8_international_travel_control
    ) %>% 
    # Make binary versions of these columns
    mutate(across(c(c3_cancel_events, c4_gatherings,
                    c5_public_transport, c6_stay_at_home,
                    c7_internal_movement, c8_intl_travel,
                    e1_income_support, e2_debt_relief),
                  list(bin = ~ ifelse(. > 0, 1, 0)))) %>% 
    # Create indicators for whether policies were added, removed, or never changed
    group_by(country_name) %>% 
    mutate(across(c(c3_cancel_events, c4_gatherings,
                    c5_public_transport, c6_stay_at_home,
                    c7_internal_movement, c8_intl_travel,
                    e1_income_support, e2_debt_relief),
                  list(added = ~any(c(NA, diff(.)) >= 1, na.rm = TRUE),
                       removed = ~any(c(NA, diff(.)) <= -1, na.rm = TRUE),
                       never = ~all(c(NA, diff(.)) == 0, na.rm = TRUE)))) %>% 
    ungroup() %>% 
    # Final column order
    select(-country_code) %>% 
    select(country_name, iso3, day, everything())
  
  return(x)
}

clean_pandem <- function(path) {
  pandem_raw <- read_excel(path)
  
  pandem_levels <- c("None" = "0", "Minor" = "1", "Moderate" = "2", "Major" = "3")
  
  pandem_clean <- pandem_raw %>% 
    mutate(quarter_numeric = parse_number(quarter) / 10) %>% 
    mutate(year_quarter = year + quarter_numeric) %>% 
    mutate(iso3 = countrycode(country_name, 
                              origin = "country.name", 
                              destination = "iso3c"),
           country_name = countrycode(iso3, origin = "iso3c", 
                                      destination = "country.name",
                                      custom_match = c("TUR" = "Türkiye"))) %>% 
    select(country_name, iso3, year, year_quarter, 
           pandem, panback, 
           pandem_discrim = type1, 
           pandem_ndrights = type2,
           pandem_abusive = type3,
           pandem_nolimit = type4,
           pandem_media = type7) %>% 
    # Make these 0-3 columns factors
    mutate(across(starts_with("pandem_"), ~factor(., levels = pandem_levels, ordered = TRUE))) %>% 
    # Add labels
    mutate(across(starts_with("pandem_"), ~fct_recode(., !!!pandem_levels))) %>% 
    # Drop unused levels
    mutate(across(starts_with("pandem_"), ~fct_drop(.)))
  
  return(pandem_clean)
}

clean_vdem <- function(path) {
  vdem_raw <- read_rds(path) %>% as_tibble()
  
  vdem_clean <- vdem_raw %>% 
    filter(year >= 2020) %>% 
    mutate(country_name = countrycode(
      country_text_id, origin = "iso3c", destination = "country.name",
      custom_match = c("XKX" = "Kosovo", "ZZB" = "Zanzibar", 
                       "PSG" = "Palestine (Gaza)", "SML" = "Somaliland", 
                       "TUR" = "Türkiye")
    )) %>% 
    select(country_name, iso3 = country_text_id, year,
           
           # Civil society stuff
           v2csreprss,  # CSO repression
           v2xcs_ccsi,  # Core civil society index (entry/exit, repression, participatory env)
           
           # Human rights and politics
           # Political corruption index (less to more, 0-1) (public sector +
           # executive + legislative + judicial corruption)
           v2x_corr,
           v2x_rule,  # Rule of law index
           
           # Rights indexes
           v2x_civlib,  # Civil liberties index
           v2x_clphy,  # Physical violence index
           v2x_clpriv,  # Private civil liberties index
           v2x_clpol,  # Political civil liberties index
           
           # Democracy
           v2x_polyarchy, v2x_libdem, v2x_regime_amb
    )
  
  return(vdem_clean)
}

create_daily_skeleton <- function(iccpr_who, oxford, pandem, vdem) {
  all_countries <- list(unique(iccpr_who$iso3), 
                        unique(oxford$iso3), 
                        unique(pandem$iso3),
                        unique(vdem$iso3))
  
  countries_in_all_data <- reduce(all_countries, intersect)
  
  # first_day <- min(oxford$day)
  first_day <- ymd("2020-03-11")
  last_day <- max(oxford$day)
  
  daily_skeleton <- expand_grid(
    iso3 = countries_in_all_data,
    day = seq(first_day, last_day, by = "1 day")
  ) %>% 
    mutate(year = year(day),
           year_quarter = quarter(day, type = "year.quarter"),
           day_num = as.numeric(day) - as.numeric(ymd("2020-03-10")),
           year_week = calendar_narrow(as_iso_year_week_day(day), "week"),
           year_week_day = as_year_month_day(calendar_narrow(set_day(year_week, 1), "day"))) %>% 
    # Force the year_week column to be text since it's a weird {clock} class
    mutate(year_week = as.character(year_week),
           year_week_day = as.character(year_week_day)) %>% 
    # Pandem starts Q2 2020 on March 11 instead of April 1
    mutate(year_quarter = ifelse(year_quarter == 2020.1, 2020.2, year_quarter)) %>% 
    mutate(country_name = countrycode(
      iso3, origin = "iso3c", destination = "country.name",
      custom_match = c("XKX" = "Kosovo", "TUR" = "Türkiye")
    )) %>% 
    select(country_name, iso3, day, day_num, year, year_quarter, year_week, year_week_day)
  
  return(daily_skeleton)
}

make_final_data <- function(skeleton, iccpr_who, iccpr_action, oxford, pandem, vdem) {
  daily_final <- skeleton %>% 
    left_join(select(iccpr_who, -country_name), by = c("iso3", "day")) %>% 
    left_join(select(iccpr_action, -country_name), by = c("iso3")) %>% 
    left_join(select(oxford, -country_name), by = c("iso3", "day")) %>% 
    left_join(select(pandem, -c(country_name, year)), by = c("iso3", "year_quarter")) %>% 
    left_join(select(vdem, -country_name), by = c("iso3", "year"))
  
  return(daily_final)
}

make_weekly_data <- function(daily_final) {
  weekly_final <- daily_final %>% 
    group_by(year_week, year_week_day, country_name, iso3, who_region, who_region_long, 
             prior_iccpr_derogations, prior_iccpr_other_action) %>% 
    summarize(across(c(new_cases, new_deaths), ~sum(., na.rm = TRUE)),
              across(c(iccpr_derogation_filed, derogation_start, 
                       derogation_ineffect, derogation_end), ~max(., na.rm = TRUE)),
              across(matches("[ce]\\d_"), ~max(., na.rm = TRUE)),
              across(c(pandem, panback, starts_with("pandem_"), starts_with("v2")), ~max(., na.rm = TRUE))) %>% 
    group_by(country_name) %>% 
    mutate(cumulative_cases = cumsum(new_cases),
           cumulative_deaths = cumsum(new_deaths)) %>% 
    mutate(year_week_num = 1:n(), .after = "year_week") %>% 
    ungroup() %>% 
    arrange(country_name)
  
  return(weekly_final)
}

make_year_week_lookup <- function(weekly_final) {
  year_week_lookup <- weekly_final %>% 
    distinct(year_week, year_week_num, year_week_day) %>% 
    mutate(year_week_day = ymd(year_week_day))
  
  return(year_week_lookup)
}

load_world_map <- function(path) {
  world_map <- read_sf(path) %>%
    filter(ISO_A3 != "ATA")
  
  return(world_map)
}

make_derogation_count <- function(data) {
  new_derogations <- data %>% 
    group_by(iso3) %>% 
    summarize(derogations = sum(iccpr_derogation_filed))
  
  return(new_derogations)
}

make_derogation_map_data <- function(derogation_count, map) {
  map_with_derogations <- map %>%
    # Fix some Natural Earth ISO weirdness
    mutate(ISO_A3 = ifelse(ISO_A3 == "-99", as.character(ISO_A3_EH), as.character(ISO_A3))) %>%
    mutate(ISO_A3 = case_when(
      .$ISO_A3 == "GRL" ~ "DNK",
      .$NAME == "Norway" ~ "NOR",
      .$NAME == "Kosovo" ~ "XKK",
      TRUE ~ ISO_A3
    )) %>%
    left_join(derogation_count, by = join_by(ISO_A3 == iso3)) %>% 
    mutate(derogations_1plus = ifelse(derogations == 0, NA, derogations))
  
  return(map_with_derogations)
}

# When using a file-based target, {targets} requires that the function that
# saves the file returns a path to the file. write_csv() and write_dta() both
# invisibly return the data frame being written, and saveRDS() returns NULL, so
# we need some wrapper functions to save the files and return the paths.
save_csv <- function(df, path) {
  readr::write_csv(df, path)
  return(path)
}

save_r <- function(df, path) {
  saveRDS(df, path)
  return(path)
}

save_dta <- function(df, path) {
  haven::write_dta(df, path)
  return(path)
}
R/funs_graphics.R
Code
clrs <- MetBrewer::met.brewer("Tiepolo")

set_annotation_fonts <- function() {
  ggplot2::update_geom_defaults("label", list(family = "Noto Sans", face = "plain"))
  ggplot2::update_geom_defaults("text", list(family = "Noto Sans", face = "plain"))
}

theme_pandem <- function(base_size = 11, base_family = "Noto Sans", prior = FALSE) {
  ret <- theme_bw(base_size, base_family) +
    theme(panel.background = element_rect(fill = "#ffffff", colour = NA),
          title = element_text(size = rel(1), family = "Noto Sans", face = "bold"),
          plot.subtitle = element_text(size = rel(0.8),
                                       family = "Noto Sans", face = "plain"),
          plot.caption = element_text(margin = margin(t = 10), size = rel(0.6),
                                      family = "Noto Sans", face = "plain"),
          panel.border = element_rect(color = "grey50", fill = NA, linewidth = 0.15),
          panel.spacing = unit(1, "lines"),
          panel.grid.minor = element_blank(),
          panel.grid.major = element_line(linewidth = 0.25, colour = "grey90"),
          axis.line = element_blank(),
          axis.ticks = element_blank(),
          axis.title = element_text(size = rel(0.8),
                                    family = "Noto Sans", face = "bold"),
          axis.title.x = element_text(hjust = 0, margin = margin(t = 10)),
          axis.title.y = element_text(hjust = 1, margin = margin(r = 10)),
          legend.position = "bottom",
          legend.title = element_text(size = rel(0.7), vjust = 0.5,
                                      family = "Noto Sans", face = "plain"),
          legend.key.size = unit(0.7, "line"),
          legend.key = element_blank(),
          legend.spacing = unit(0.1, "lines"),
          legend.justification = "left",
          legend.margin = margin(t = -5, b = 0, l = 0, r = 0),
          strip.text = element_text(size = rel(0.9), hjust = 0,
                                    family = "Noto Sans", face = "bold"),
          strip.background = element_rect(fill = "white", colour = NA))
  
  if (prior) {
    ret <- ret +
      theme(panel.grid.major = element_blank(),
            axis.title.y = element_blank(),
            axis.text.y = element_blank(),
            panel.border = element_blank())
  } else {
    ret
  }
}
R/funs_models.R
Code
# Running modelsummary() on Bayesian models takes a while because of all the
# calculations involved in creating the GOF statistics. With modelsummary 0.7+,
# though it's now possible to build the base model with modelsummary(..., output
# = "modelsummary_list"), save that as an intermediate object, and then feed it
# through modelsummary() again with whatever other output you want. The
# modelsummary_list-based object thus acts like an output-agnostic ur-model.

build_modelsummary <- function(model_df) {
  msl <- model_df %>% 
    pull(model, name = nice) %>% 
    modelsummary::modelsummary(output = "modelsummary_list",
                               statistic = "[{conf.low}, {conf.high}]",
                               ci_method = "hdi",
                               metrics = c("R2"))
  return(msl)
}

# This is how to get other stats like ELPD (using metrics = "LOOIC" is required
# for that; metrics = "R2" provides nobs). I would do this for all the summary
# tables, but for whatever reason, it takes forever to calculate ELPD/LOO stuff
# on ordered logit models, so we just show N instead
#
# gm <- tribble(
#   ~raw,        ~clean,      ~fmt, ~omit,
#   "nobs",      "N",         0,    FALSE,
#   "r.squared", "R2",        3,    FALSE,
#   "elpd",      "ELPD",      1,    FALSE,
#   "elpd.se",   "ELPD (SE)", 1,    FALSE
# )
# 
# modelsummary(models_policies,
#              estimate = "{estimate}",
#              statistic = "conf.int",
#              gof_map = gm,
#              metrics = c("R2", "LOOIC"))

gof_map <- tribble(
  ~raw,          ~clean,                 ~fmt, ~omit,
  "nobs",        "N",                    0,    FALSE,
  "r.squared",   "\\(R^2\\) (total)",    2,    FALSE,
  "r2.marginal", "\\(R^2\\) (marginal)", 2,    FALSE
)

coef_map <- c(
  "b_derogation_ineffect" = "Derogation in effect",
  "b_new_cases_z" = "New cases (standardized)",
  "b_cumulative_cases_z" = "Cumulative cases (standardized)",
  "b_new_deaths_z" = "New deaths (standardized)",
  "b_cumulative_deaths_z" = "Cumulative deaths (standardized)",
  "b_prior_iccpr_derogationsTRUE" = "Past ICCPR derogation",
  "b_prior_iccpr_other_actionTRUE" = "Past ICCPR action",
  "b_v2x_rule" = "Rule of law",
  "b_v2x_civlib" = "Civil liberties",
  "b_v2xcs_ccsi" = "Core civil society index",
  "b_Intercept" = "Constant",
  "b_Intercept[1]" = "Cut 1",
  "b_Intercept[2]" = "Cut 2",
  "b_Intercept[3]" = "Cut 3",
  "b_year_week_num" = "Year-week",
  "sd_country_name__Intercept" = "Country random effects σ",
  "sd_who_region__Intercept" = "Region random effects σ"
)


# Model definitions
f_policies <- function(panel) {
  BAYES_SEED <- 1757  # From random.org
  
  panel <- panel %>% 
    mutate(across(c(new_cases, new_deaths, cumulative_cases, cumulative_deaths),
                  list(z = ~as.numeric(scale(.)))))
  
  # Use .data[[blah]] for tidy evaluation with strings
  # https://www.tidyverse.org/blog/2019/06/rlang-0-4-0/#other-simple-tidy-evaluation-patterns
  never_filter <- function(x) {
    panel %>% 
      filter(.data[[x]] == 0)
  }
  
  logit_priors <- c(prior(student_t(1, 0, 3), class = Intercept),
                    prior(student_t(1, 0, 3), class = b),
                    prior(cauchy(0, 1), class = sd, lb = 0))
  
  policies_model <- function(y, data) {
    form <- glue::glue(y, " ~ derogation_ineffect + new_cases_z + cumulative_cases_z + ",
                       "new_deaths_z + cumulative_deaths_z + ",
                       "prior_iccpr_derogations + prior_iccpr_other_action + ",
                       "v2x_rule + v2x_civlib + v2xcs_ccsi + ",
                       "year_week_num + (1 | country_name)") %>% 
      as.formula()
    
    # Use rlang::inject() to evaluate the formula object before running the model
    # so that the formula shows up correctly in summary(). 
    # See https://community.rstudio.com/t/tidy-evaluation-and-formulae/4561/17
    rlang::inject(
      brm(
        bf(!!form), 
        data = data,
        family = bernoulli(),
        prior = logit_priors,
        chains = 4, seed = BAYES_SEED,
        threads = threading(2)  # Two CPUs per chain to speed things up
      )
    )
  }
  
  policies_models <- tribble(
    ~nice, ~y, ~never,
    "Cancel Public Events", "c3_cancel_events_bin", "c3_cancel_events_never",
    "Gathering Restrictions", "c4_gatherings_bin", "c4_gatherings_never",
    "Close Public Transit", "c5_public_transport_bin", "c5_public_transport_never",
    "Movement", "c7_internal_movement_bin", "c7_internal_movement_never",
    "International Travel", "c8_intl_travel_bin", "c8_intl_travel_never"
  ) %>% 
    mutate(data = map(never, ~never_filter(.))) %>% 
    mutate(model = map2(y, data, ~policies_model(.x, .y)))
  
  return(policies_models)
}


f_human_rights <- function(panel) {
  BAYES_SEED <- 6440  # From random.org
  
  panel <- panel %>% 
    mutate(across(c(new_cases, new_deaths, cumulative_cases, cumulative_deaths),
                  list(z = ~as.numeric(scale(.)))))
  
  logit_priors <- c(prior(student_t(1, 0, 3), class = Intercept),
                    prior(student_t(1, 0, 3), class = b),
                    prior(cauchy(0, 1), class = sd, lb = 0))
  
  ologit_priors <- c(prior(student_t(1, 0, 3), class = Intercept),
                     prior(student_t(1, 0, 3), class = b),
                     prior(cauchy(0, 1), class = sd, lb = 0))
  
  # It would be neat to include country random effects but there's not enough
  # variation within lots of the countries to pick up any of the effect. When 
  # (1 | country_name) is included, the model predicts a 100% chance of 
  # pandem_discrim == 1 and a 0% chance of pandem_discrim == 0, which is
  # annoying (and it takes 45 minutes to run, ugh)
  
  # So instead we use region random effects instead? That only takes 10 minutes.
  # Or no random effects at all?
  human_rights_model <- function(y, family, prior) {
    form <- glue::glue(y, " ~ derogation_ineffect + new_cases_z + cumulative_cases_z + ",
                       "new_deaths_z + cumulative_deaths_z + ", 
                       "prior_iccpr_derogations + prior_iccpr_other_action + ",
                       "v2x_rule + v2x_civlib + v2xcs_ccsi + ",
                       "year_week_num + (1 | who_region)") %>% 
      as.formula()
    
    # Use rlang::inject() to evaluate the formula object before running the model
    # so that the formula shows up correctly in summary(). 
    # See https://community.rstudio.com/t/tidy-evaluation-and-formulae/4561/17
    rlang::inject(
      brm(
        bf(!!form), 
        data = panel,
        family = family,
        prior = prior,
        chains = 4, seed = BAYES_SEED,
        threads = threading(2)  # Two CPUs per chain to speed things up
      )
    )
  }
  
  human_rights_models <- tribble(
    ~nice, ~y, ~family, ~prior,
    "Discriminatory Policy", "pandem_discrim", "cumulative", ologit_priors,
    "Non-Derogable Rights", "pandem_ndrights", "bernoulli", logit_priors,
    "No Time Limit Measures", "pandem_nolimit", "cumulative", ologit_priors,
    "Abusive Enforcement", "pandem_abusive", "cumulative", ologit_priors
  ) %>% 
    # Neat pattern for using named list elements in pmap instead of ..1, ..2, etc.
    # https://stackoverflow.com/a/66147672/120898
    # But we can't use that here because of ... issues nested in a function like
    # this. And there are similar issues when using ..1, ..2, etc. when using
    # the formula syntax like ~human_rights_model(..1, ..2, ..3). Everything works fine
    # without the anonymous lambda ~ syntax though
    mutate(model = pmap(lst(y, family, prior), human_rights_model))
  
  return(human_rights_models)
}
R/funs_plots.R
Code
# Diagnostic plots

plot_trace <- function(model, params) {
  model %>% 
    tidybayes::gather_draws(!!!syms(params)) %>% 
    ggplot(aes(x = .iteration, y = .value, color = factor(.chain))) +
    geom_line(linewidth = 0.1) +
    scale_color_viridis_d(option = "rocket", end = 0.85) +
    labs(color = "Chain") +
    facet_wrap(vars(.variable), scales = "free_y") +
    theme_pandem()
}

plot_trank <- function(model, params) {
  model %>% 
    tidybayes::gather_draws(!!!syms(params)) %>% 
    group_by(.variable) %>% 
    mutate(draw_rank = rank(.value)) %>% 
    ggplot(aes(x = draw_rank, color = factor(.chain))) +
    stat_bin(geom = "step", binwidth = 200, position = position_identity(), boundary = 0) +
    scale_color_viridis_d(option = "rocket", end = 0.85) +
    labs(color = "Chain") +
    facet_wrap(vars(.variable), scales = "free_y") +
    theme_pandem()
}

plot_pp <- function(model) {
  bayesplot::pp_check(model, ndraws = 100, type = "bars") +
    theme_pandem()
}


# Storing ggplot objects as rds files is BAD 
#   (https://github.com/hadley/ggplot2-book/issues/344)
#
# But it's also necessary/recommended when working with {targets}---the basic
# walkthrough in the manual even shows how to create a plot as a target
# 
# ggplot objects are strange beasts because they store a copy of the overall
# environment when serialized into rds files. When working with regular-sized
# datasets, this isn't really ever a problem. But when working with tidy
# data frames of MCMC chains with millions and millions of rows, this can create
# rds files that are hundreds or thousands of MBs, which is wild.
#
# This post goes into more details about how to fix it: 
# https://ropensci.org/blog/2022/12/06/save-ggplot2-targets/
#
# Instead of using stat_lineribbon() on tidy MCMC draws like I normally do, it's
# better to collapse the data down first with median_qi() and then use
# geom_lineribbon(). This results in much tinier data frames and plots render
# immediately in .qmd files

library(marginaleffects)

build_policies_plot_data <- function(model_df, year_week_lookup) {
  model_df_preds_mfx <- model_df %>% 
    mutate(pred_data = map(model, ~{
      datagrid(model = .x,
               year_week_num = 1:69,
               derogation_ineffect = 0:1) %>% 
        tidybayes::add_epred_draws(.x) %>% 
        left_join(year_week_lookup, by = "year_week_num") %>% 
        mutate(derogation_ineffect = factor(derogation_ineffect, 
                                            levels = 0:1,
                                            labels = c("No", "Yes"),
                                            ordered = TRUE)) %>% 
        group_by(year_week_day, derogation_ineffect) %>% 
        tidybayes::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
    })) %>% 
    mutate(mfx_data = map(model, ~{
      .x %>% 
        comparisons(newdata = datagrid(year_week_num = 1:69),
                    variables = "derogation_ineffect",
                    type = "response")  %>%  
        posteriordraws() %>% 
        left_join(year_week_lookup, by = "year_week_num") %>% 
        group_by(year_week_day) %>% 
        tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95))
    }))
  
  return(model_df_preds_mfx)
}


build_human_rights_plot_data <- function(model_df, year_week_lookup) {
  model_df_preds_mfx <- model_df %>% 
    mutate(pred_data = map2(model, family, ~{
      if (.y == "cumulative") {
        df <- datagrid(model = .x,
                       year_week_num = 1:69,
                       derogation_ineffect = 0:1) %>% 
          tidybayes::add_epred_draws(.x) %>% 
          left_join(year_week_lookup, by = "year_week_num") %>% 
          mutate(derogation_ineffect = factor(derogation_ineffect, 
                                              levels = 0:1,
                                              labels = c("No derogation", "Derogation in effect"),
                                              ordered = TRUE)) %>% 
          group_by(year_week_day, .category, derogation_ineffect) %>% 
          tidybayes::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
      } else {
        df <- datagrid(model = .x,
                       year_week_num = 1:69,
                       derogation_ineffect = 0:1) %>% 
          tidybayes::add_epred_draws(.x) %>% 
          left_join(year_week_lookup, by = "year_week_num") %>% 
          mutate(derogation_ineffect = factor(derogation_ineffect, 
                                              levels = 0:1,
                                              labels = c("No", "Yes"),
                                              ordered = TRUE)) %>% 
          group_by(year_week_day, derogation_ineffect) %>% 
          tidybayes::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
      }
    })) %>% 
    mutate(mfx_data = pmap(list(model, family, y), \(.model, .family, .y) {
      mfx <- .model %>% 
        comparisons(newdata = datagrid(year_week_num = 1:69),
                    variables = "derogation_ineffect",
                    type = "response")  %>%  
        posteriordraws() %>% 
        left_join(year_week_lookup, by = "year_week_num") 
      
      if (.family == "cumulative") {
        mfx <- mfx %>% 
          mutate(group = factor(group, levels = levels(.model$data[[.y]]), ordered = TRUE)) %>% 
          group_by(year_week_day, group) %>% 
          tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95))
      } else {
        mfx <- mfx %>% 
          group_by(year_week_day) %>% 
          tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95))
      }
      
      mfx
    }))
  
  return(model_df_preds_mfx)
}