Targets pipeline
targets pipeline
We use the magical {targets} package to run our analysis and keep track of all dependencies automatically.
To build our entire project, run targets::tar_make()
at the R console.
Here’s our complete pipeline:
Actual code
All the data processing is handled with dataset-specific functions that live in R/funs_data-cleaning.R
, which targets
then runs as needed. For the sake of transparency, here’s that code:
R/funs_data-cleaning.R
Code
library(readxl)
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(clock))
library(countrycode)
library(jsonlite)
suppressPackageStartupMessages(library(sf))
<- function(path, subregions_path) {
clean_iccpr_who <- tribble(
who_regions ~who_region, ~who_region_long,
"AFRO", "Regional Office for Africa",
"AMRO", "Regional Office for the Americas",
"SEARO", "Regional Office for South-East Asia",
"EURO", "Regional Office for Europe",
"EMRO", "Regional Office for the Eastern Mediterranean",
"WPRO", "Regional Office for the Western Pacific"
)
<- tribble(
who_subregions_manual ~member_states, ~region_abbr, ~stratum, ~subregion_imputed,
"American Samoa", "WPR", "B", TRUE,
"Anguilla", "AMR", "B", TRUE,
"Aruba", "AMR", "B", TRUE,
"Bermuda", "AMR", "B", TRUE,
"British Virgin Islands", "AMR", "B", TRUE,
"Cayman Islands", "AMR", "B", TRUE,
"Curaçao", "AMR", "B", TRUE,
"French Guiana", "AMR", "B", TRUE,
"French Polynesia", "WPR", "B", TRUE,
"Kosovo", "EUR", "B", TRUE,
"Liechtenstein", "EUR", "A", TRUE,
"Palestinian Territories", "EMR", "B", TRUE,
"St. Barthélemy", "AMR", "B", TRUE,
"St. Helena", "AFR", "E", TRUE,
"St. Martin (French part)", "AMR", "B", TRUE,
"Sint Maarten", "AMR", "B", TRUE,
"Turks & Caicos Islands", "AMR", "B", TRUE
)
<- read_excel(subregions_path) %>%
who_subregions set_names(c("subregions", "member_states")) %>%
separate(subregions, into = c("region_abbr", "stratum"), sep = " ") %>%
mutate(member_states = str_split(member_states, "; ")) %>%
unnest(member_states) %>%
bind_rows(who_subregions_manual) %>%
mutate(
who_region = paste0(region_abbr, "O"),
who_subregion = paste0(region_abbr, stratum)
%>%
) replace_na(list(subregion_imputed = FALSE)) %>%
mutate(
country_name = countrycode(
origin = "country.name", destination = "country.name",
member_states, custom_match = c("Turkey" = "Türkiye")),
iso3 = countrycode(
origin = "country.name", destination = "iso3c",
country_name, custom_match = c("Kosovo" = "XKX"))
%>%
) select(iso3, who_subregion, subregion_imputed)
<- read_excel(path) %>%
x ::clean_names() %>%
janitor# Make this a date instead of PosixCT
mutate(date_reported = as.Date(date_reported)) %>%
# All NAs here are actually 0s
replace_na(list(iccpr_derogation_filed = 0,
derogation_start = 0,
derogation_ineffect = 0,
derogation_end = 0)) %>%
# Country names and codes fun times
mutate(
country_name = countrycode(
origin = "iso2c", destination = "country.name",
country_code, custom_match = c("XK" = "Kosovo", "TR" = "Türkiye")),
iso3 = countrycode(
origin = "iso2c", destination = "iso3c",
country_code, custom_match = c("XK" = "XKX")
)%>%
) left_join(who_regions, by = "who_region") %>%
left_join(who_subregions, by = "iso3") %>%
# Final column order
select(-c(country_code, country, cow_code)) %>%
select(country_name, iso3, who_region, who_region_long,
who_subregion, subregion_imputed,day = date_reported, everything())
return(x)
}
<- function(path) {
clean_iccpr_action <- read_excel(path) %>%
x ::clean_names() %>%
janitorrename(prior_iccpr_other_action = other_prior_iccpr_post_commitment_treaty_actions,
country_name = country) %>%
mutate(across(starts_with("prior_"), ~case_when(
== 1 ~ TRUE,
. is.na(.) ~ FALSE
)))
return(x)
}
<- function(path) {
clean_oxford <- tibble(
x # Get a list of all the sheets in the Excel file
index_name = excel_sheets(path)
%>%
) # Read each sheet
mutate(data = map(index_name, ~read_excel(path, sheet = .x))) %>%
# Standardize the index name based on the sheet name
mutate(index_name = janitor::make_clean_names(index_name)) %>%
# Make each data frame cell in the list column long and a little cleaner
mutate(clean = map(data, ~{
%>%
.x pivot_longer(cols = -c(country_code, country_name),
names_to = "day", values_to = "value") %>%
mutate(day = dmy(day))
%>%
})) # Get rid of the original wide data frame and unnest the long clean data
select(-data) %>%
unnest(clean) %>%
# Make data wide so that there's a column for each index and row for each
# country-day
pivot_wider(names_from = "index_name", values_from = "value") %>%
# Country names and codes fun times
mutate(iso3 = recode(country_code, "RKS" = "XKX")) %>%
mutate(country_name = countrycode(
origin = "iso3c", destination = "country.name",
iso3, custom_match = c("XKX" = "Kosovo", "TUR" = "Türkiye")
%>%
)) # Get rid of countries with all missing data
group_by(country_name) %>%
filter(!all(is.na(stringency_index))) %>%
ungroup() %>%
# Shorten these long column names
rename(
c3_cancel_events = c3_cancel_public_events,
c4_gatherings = c4_restrictions_on_gaterings,
c5_public_transport = c5_close_public_transport,
c6_stay_at_home = c6_stay_at_home_requirements,
c7_internal_movement = c7_movement_restrictions_intern,
c8_intl_travel = c8_international_travel_control
%>%
) # Make binary versions of these columns
mutate(across(c(c3_cancel_events, c4_gatherings,
c5_public_transport, c6_stay_at_home,
c7_internal_movement, c8_intl_travel,
e1_income_support, e2_debt_relief),list(bin = ~ ifelse(. > 0, 1, 0)))) %>%
# Create indicators for whether policies were added, removed, or never changed
group_by(country_name) %>%
mutate(across(c(c3_cancel_events, c4_gatherings,
c5_public_transport, c6_stay_at_home,
c7_internal_movement, c8_intl_travel,
e1_income_support, e2_debt_relief),list(added = ~any(c(NA, diff(.)) >= 1, na.rm = TRUE),
removed = ~any(c(NA, diff(.)) <= -1, na.rm = TRUE),
never = ~all(c(NA, diff(.)) == 0, na.rm = TRUE)))) %>%
ungroup() %>%
# Final column order
select(-country_code) %>%
select(country_name, iso3, day, everything())
return(x)
}
<- function(path) {
clean_pandem <- read_excel(path)
pandem_raw
<- c("None" = "0", "Minor" = "1", "Moderate" = "2", "Major" = "3")
pandem_levels
<- pandem_raw %>%
pandem_clean mutate(year_quarter = paste0(year, "-", quarter)) %>%
mutate(iso3 = countrycode(country_name,
origin = "country.name",
destination = "iso3c"),
country_name = countrycode(iso3, origin = "iso3c",
destination = "country.name",
custom_match = c("TUR" = "Türkiye"))) %>%
select(country_name, iso3, year, year_quarter,
pandem, panback, pandem_discrim = type1,
pandem_ndrights = type2,
pandem_abusive = type3,
pandem_nolimit = type4,
pandem_media = type7) %>%
# Make these 0-3 columns factors
mutate(across(starts_with("pandem_"), ~factor(., levels = pandem_levels, ordered = TRUE))) %>%
# Add labels
mutate(across(starts_with("pandem_"), ~fct_recode(., !!!pandem_levels))) %>%
# Drop unused levels
mutate(across(starts_with("pandem_"), ~fct_drop(.)))
return(pandem_clean)
}
<- function(path) {
clean_vdem <- read_rds(path) %>% as_tibble()
vdem_raw
<- vdem_raw %>%
vdem_clean filter(year >= 2020) %>%
mutate(country_name = countrycode(
origin = "iso3c", destination = "country.name",
country_text_id, custom_match = c("XKX" = "Kosovo", "ZZB" = "Zanzibar",
"PSG" = "Palestine (Gaza)", "SML" = "Somaliland",
"TUR" = "Türkiye")
%>%
)) select(country_name, iso3 = country_text_id, year,
# Civil society stuff
# CSO repression
v2csreprss, # Core civil society index (entry/exit, repression, participatory env)
v2xcs_ccsi,
# Human rights and politics
# Political corruption index (less to more, 0-1) (public sector +
# executive + legislative + judicial corruption)
v2x_corr,# Rule of law index
v2x_rule,
# Rights indexes
# Civil liberties index
v2x_civlib, # Physical violence index
v2x_clphy, # Private civil liberties index
v2x_clpriv, # Political civil liberties index
v2x_clpol,
# Democracy
v2x_polyarchy, v2x_libdem, v2x_regime_amb
)
return(vdem_clean)
}
<- function(iccpr_who, oxford, pandem, vdem) {
create_daily_skeleton <- list(unique(iccpr_who$iso3),
all_countries unique(oxford$iso3),
unique(pandem$iso3),
unique(vdem$iso3))
<- reduce(all_countries, intersect)
countries_in_all_data
# first_day <- min(oxford$day)
<- ymd("2020-03-11")
first_day <- max(oxford$day)
last_day
<- expand_grid(
daily_skeleton iso3 = countries_in_all_data,
day = seq(first_day, last_day, by = "1 day")
%>%
) mutate(year = year(day),
year_quarter = quarter(day, type = "year.quarter"),
day_num = as.numeric(day) - as.numeric(ymd("2020-03-10")),
year_week = calendar_narrow(as_iso_year_week_day(day), "week"),
year_week_day = as_year_month_day(calendar_narrow(set_day(year_week, 1), "day")),
year_quarter = calendar_narrow(as_year_quarter_day(day), "quarter"),
year_quarter_day = as_year_month_day(calendar_narrow(set_day(year_quarter, 1), "day"))) %>%
# Force the year_week column to be text since it's a weird {clock} class
mutate(year_week = as.character(year_week),
year_week_day = as.character(year_week_day),
year_quarter = as.character(year_quarter),
year_quarter_day = as.character(year_quarter_day)) %>%
# Pandem starts Q2 2020 on March 11 instead of April 1
mutate(year_quarter = ifelse(year_quarter == "2020-Q1", "2020-Q2", year_quarter)) %>%
mutate(country_name = countrycode(
origin = "iso3c", destination = "country.name",
iso3, custom_match = c("XKX" = "Kosovo", "TUR" = "Türkiye")
%>%
)) select(country_name, iso3, day, day_num, year,
year_quarter, year_quarter_day, year_week, year_week_day)
return(daily_skeleton)
}
<- function(skeleton, iccpr_who, iccpr_action, oxford, pandem, vdem) {
make_final_data <- skeleton %>%
daily_final left_join(select(iccpr_who, -country_name), by = c("iso3", "day")) %>%
left_join(select(iccpr_action, -country_name), by = c("iso3")) %>%
left_join(select(oxford, -country_name), by = c("iso3", "day")) %>%
left_join(select(pandem, -c(country_name, year)), by = c("iso3", "year_quarter")) %>%
left_join(select(vdem, -country_name), by = c("iso3", "year"))
return(daily_final)
}
<- function(daily_final) {
make_weekly_data <- daily_final %>%
weekly_final group_by(year_week, year_week_day, country_name, iso3, who_region, who_region_long,
%>%
who_subregion, prior_iccpr_derogations, prior_iccpr_other_action) summarize(across(c(new_cases, new_deaths), ~sum(., na.rm = TRUE)),
across(c(iccpr_derogation_filed, derogation_start,
~max(., na.rm = TRUE)),
derogation_ineffect, derogation_end), across(matches("[ce]\\d_"), ~max(., na.rm = TRUE)),
across(c(pandem, panback, starts_with("pandem_"), starts_with("v2")), ~max(., na.rm = TRUE))) %>%
group_by(country_name) %>%
mutate(cumulative_cases = cumsum(new_cases),
cumulative_deaths = cumsum(new_deaths)) %>%
mutate(year_week_num = 1:n(), .after = "year_week") %>%
ungroup() %>%
arrange(country_name)
return(weekly_final)
}
<- function(daily_final) {
make_quarterly_data <- daily_final %>%
quarterly_final group_by(year_quarter, year_quarter_day, country_name, iso3, who_region, who_region_long,
%>%
who_subregion, prior_iccpr_derogations, prior_iccpr_other_action) summarize(across(c(new_cases, new_deaths), ~sum(., na.rm = TRUE)),
across(c(iccpr_derogation_filed, derogation_start,
~max(., na.rm = TRUE)),
derogation_ineffect, derogation_end), across(matches("[ce]\\d_"), ~max(., na.rm = TRUE)),
across(c(pandem, panback, starts_with("pandem_"), starts_with("v2")), ~max(., na.rm = TRUE))) %>%
group_by(country_name) %>%
mutate(cumulative_cases = cumsum(new_cases),
cumulative_deaths = cumsum(new_deaths)) %>%
mutate(year_quarter_num = 1:n(), .after = "year_quarter") %>%
ungroup() %>%
arrange(country_name)
return(quarterly_final)
}
<- function(weekly_final) {
make_year_week_lookup <- weekly_final %>%
year_week_lookup distinct(year_week, year_week_num, year_week_day) %>%
mutate(year_week_day = ymd(year_week_day))
return(year_week_lookup)
}
<- function(quarterly_final) {
make_year_quarter_lookup <- quarterly_final %>%
year_quarter_lookup distinct(year_quarter, year_quarter_num, year_quarter_day) %>%
mutate(year_quarter_day = ymd(year_quarter_day))
return(year_quarter_lookup)
}
<- function(path) {
load_world_map <- read_sf(path) %>%
world_map filter(ISO_A3 != "ATA")
return(world_map)
}
<- function(data) {
make_derogation_count <- data %>%
new_derogations group_by(iso3) %>%
summarize(derogations = sum(iccpr_derogation_filed))
return(new_derogations)
}
<- function(derogation_count, map) {
make_derogation_map_data <- map %>%
map_with_derogations # Fix some Natural Earth ISO weirdness
mutate(ISO_A3 = ifelse(ISO_A3 == "-99", as.character(ISO_A3_EH), as.character(ISO_A3))) %>%
mutate(ISO_A3 = case_when(
$ISO_A3 == "GRL" ~ "DNK",
.$NAME == "Norway" ~ "NOR",
.$NAME == "Kosovo" ~ "XKK",
.TRUE ~ ISO_A3
%>%
)) left_join(derogation_count, by = join_by(ISO_A3 == iso3)) %>%
mutate(derogations_1plus = ifelse(derogations == 0, NA, derogations))
return(map_with_derogations)
}
# When using a file-based target, {targets} requires that the function that
# saves the file returns a path to the file. write_csv() and write_dta() both
# invisibly return the data frame being written, and saveRDS() returns NULL, so
# we need some wrapper functions to save the files and return the paths.
<- function(df, path) {
save_csv ::write_csv(df, path)
readrreturn(path)
}
<- function(df, path) {
save_r saveRDS(df, path)
return(path)
}
<- function(df, path) {
save_dta ::write_dta(df, path)
havenreturn(path)
}
R/funs_graphics.R
Code
<- MetBrewer::met.brewer("Tiepolo")
clrs
<- function() {
set_annotation_fonts ::update_geom_defaults("label", list(family = "Noto Sans", face = "plain"))
ggplot2::update_geom_defaults("text", list(family = "Noto Sans", face = "plain"))
ggplot2
}
<- function(base_size = 11, base_family = "Noto Sans", prior = FALSE) {
theme_pandem <- theme_bw(base_size, base_family) +
ret theme(panel.background = element_rect(fill = "#ffffff", colour = NA),
title = element_text(size = rel(1), family = "Noto Sans", face = "bold"),
plot.subtitle = element_text(size = rel(0.8),
family = "Noto Sans", face = "plain"),
plot.caption = element_text(margin = margin(t = 10), size = rel(0.6),
family = "Noto Sans", face = "plain"),
panel.border = element_rect(color = "grey50", fill = NA, linewidth = 0.15),
panel.spacing = unit(1, "lines"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(linewidth = 0.25, colour = "grey90"),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title = element_text(size = rel(0.8),
family = "Noto Sans", face = "bold"),
axis.title.x = element_text(hjust = 0, margin = margin(t = 10)),
axis.title.y = element_text(hjust = 1, margin = margin(r = 10)),
legend.position = "bottom",
legend.title = element_text(size = rel(0.7), vjust = 0.5,
family = "Noto Sans", face = "plain"),
legend.key.size = unit(0.7, "line"),
legend.key = element_blank(),
legend.spacing = unit(0.1, "lines"),
legend.justification = "left",
legend.margin = margin(t = -5, b = 0, l = 0, r = 0),
strip.text = element_text(size = rel(0.9), hjust = 0,
family = "Noto Sans", face = "bold"),
strip.background = element_rect(fill = "white", colour = NA))
if (prior) {
<- ret +
ret theme(panel.grid.major = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank())
else {
}
ret
} }
R/funs_models.R
Code
# Running modelsummary() on Bayesian models takes a while because of all the
# calculations involved in creating the GOF statistics. With modelsummary 0.7+,
# though it's now possible to build the base model with modelsummary(..., output
# = "modelsummary_list"), save that as an intermediate object, and then feed it
# through modelsummary() again with whatever other output you want. The
# modelsummary_list-based object thus acts like an output-agnostic ur-model.
<- function(model_df) {
build_modelsummary <- model_df %>%
msl pull(model, name = nice) %>%
::modelsummary(output = "modelsummary_list",
modelsummarystatistic = "[{conf.low}, {conf.high}]",
ci_method = "hdi",
metrics = c("R2"))
return(msl)
}
# This is how to get other stats like ELPD (using metrics = "LOOIC" is required
# for that; metrics = "R2" provides nobs). I would do this for all the summary
# tables, but for whatever reason, it takes forever to calculate ELPD/LOO stuff
# on ordered logit models, so we just show N instead
#
# gm <- tribble(
# ~raw, ~clean, ~fmt, ~omit,
# "nobs", "N", 0, FALSE,
# "r.squared", "R2", 3, FALSE,
# "elpd", "ELPD", 1, FALSE,
# "elpd.se", "ELPD (SE)", 1, FALSE
# )
#
# modelsummary(models_policies,
# estimate = "{estimate}",
# statistic = "conf.int",
# gof_map = gm,
# metrics = c("R2", "LOOIC"))
<- tribble(
gof_map ~raw, ~clean, ~fmt, ~omit,
"nobs", "N", 0, FALSE,
"r.squared", "\\(R^2\\) (total)", 2, FALSE,
"r2.marginal", "\\(R^2\\) (marginal)", 2, FALSE
)
<- c(
coef_map "b_derogation_ineffect" = "Derogation in effect",
"b_new_cases_z" = "New cases (standardized)",
"b_cumulative_cases_z" = "Cumulative cases (standardized)",
"b_new_deaths_z" = "New deaths (standardized)",
"b_cumulative_deaths_z" = "Cumulative deaths (standardized)",
"b_prior_iccpr_derogationsTRUE" = "Past ICCPR derogation",
"b_prior_iccpr_other_actionTRUE" = "Past ICCPR action",
"b_v2x_rule" = "Rule of law",
"b_v2x_civlib" = "Civil liberties",
"b_v2xcs_ccsi" = "Core civil society index",
"b_Intercept" = "Constant",
"b_Intercept[1]" = "Cut 1",
"b_Intercept[2]" = "Cut 2",
"b_Intercept[3]" = "Cut 3",
"b_year_week_num" = "Year-week",
"sd_country_name__Intercept" = "Country random effects σ",
"sd_who_region__Intercept" = "Region random effects σ"
)
# Model definitions
<- function(panel) {
f_policies <- 1757 # From random.org
BAYES_SEED
<- panel %>%
panel mutate(across(c(new_cases, new_deaths, cumulative_cases, cumulative_deaths),
list(z = ~as.numeric(scale(.)))))
# Use .data[[blah]] for tidy evaluation with strings
# https://www.tidyverse.org/blog/2019/06/rlang-0-4-0/#other-simple-tidy-evaluation-patterns
<- function(x) {
never_filter %>%
panel filter(.data[[x]] == 0)
}
<- c(prior(student_t(1, 0, 3), class = Intercept),
logit_priors prior(student_t(1, 0, 3), class = b),
prior(cauchy(0, 1), class = sd, lb = 0))
<- function(y, data) {
policies_model <- glue::glue(y, " ~ derogation_ineffect + new_cases_z + cumulative_cases_z + ",
form "new_deaths_z + cumulative_deaths_z + ",
"prior_iccpr_derogations + prior_iccpr_other_action + ",
"v2x_rule + v2x_civlib + v2xcs_ccsi + ",
"year_week_num + (1 | country_name)") %>%
as.formula()
# Use rlang::inject() to evaluate the formula object before running the model
# so that the formula shows up correctly in summary().
# See https://community.rstudio.com/t/tidy-evaluation-and-formulae/4561/17
::inject(
rlangbrm(
bf(!!form),
data = data,
family = bernoulli(),
prior = logit_priors,
chains = 4, seed = BAYES_SEED,
threads = threading(2) # Two CPUs per chain to speed things up
)
)
}
<- tribble(
policies_models ~nice, ~y, ~never,
"Cancel Public Events", "c3_cancel_events_bin", "c3_cancel_events_never",
"Gathering Restrictions", "c4_gatherings_bin", "c4_gatherings_never",
"Close Public Transit", "c5_public_transport_bin", "c5_public_transport_never",
"Movement", "c7_internal_movement_bin", "c7_internal_movement_never",
"International Travel", "c8_intl_travel_bin", "c8_intl_travel_never"
%>%
) mutate(data = map(never, ~never_filter(.))) %>%
mutate(model = map2(y, data, ~policies_model(.x, .y)))
return(policies_models)
}
<- function(panel) {
f_human_rights <- 6440 # From random.org
BAYES_SEED
<- panel %>%
panel mutate(across(c(new_cases, new_deaths, cumulative_cases, cumulative_deaths),
list(z = ~as.numeric(scale(.)))))
<- c(prior(student_t(1, 0, 3), class = Intercept),
logit_priors prior(student_t(1, 0, 3), class = b),
prior(cauchy(0, 1), class = sd, lb = 0))
<- c(prior(student_t(1, 0, 3), class = Intercept),
ologit_priors prior(student_t(1, 0, 3), class = b),
prior(cauchy(0, 1), class = sd, lb = 0))
# It would be neat to include country random effects but there's not enough
# variation within lots of the countries to pick up any of the effect. When
# (1 | country_name) is included, the model predicts a 100% chance of
# pandem_discrim == 1 and a 0% chance of pandem_discrim == 0, which is
# annoying (and it takes 45 minutes to run, ugh)
#
# So instead we use region random effects
<- function(y, family, prior) {
human_rights_model <- glue::glue(y, " ~ derogation_ineffect + new_cases_z + cumulative_cases_z + ",
form "new_deaths_z + cumulative_deaths_z + ",
"prior_iccpr_derogations + prior_iccpr_other_action + ",
"v2x_rule + v2x_civlib + v2xcs_ccsi + ",
"year_quarter_num + (1 | who_region)") %>%
as.formula()
# Use rlang::inject() to evaluate the formula object before running the model
# so that the formula shows up correctly in summary().
# See https://community.rstudio.com/t/tidy-evaluation-and-formulae/4561/17
::inject(
rlangbrm(
bf(!!form),
data = panel,
family = family,
prior = prior,
chains = 4, seed = BAYES_SEED,
threads = threading(2) # Two CPUs per chain to speed things up
)
)
}
<- tribble(
human_rights_models ~nice, ~y, ~family, ~prior,
"Discriminatory Policy", "pandem_discrim", "cumulative", ologit_priors,
"Non-Derogable Rights", "pandem_ndrights", "bernoulli", logit_priors,
"No Time Limit Measures", "pandem_nolimit", "cumulative", ologit_priors,
"Abusive Enforcement", "pandem_abusive", "cumulative", ologit_priors
%>%
) # Neat pattern for using named list elements in pmap instead of ..1, ..2, etc.
# https://stackoverflow.com/a/66147672/120898
# But we can't use that here because of ... issues nested in a function like
# this. And there are similar issues when using ..1, ..2, etc. when using
# the formula syntax like ~human_rights_model(..1, ..2, ..3). Everything works fine
# without the anonymous lambda ~ syntax though
mutate(model = pmap(lst(y, family, prior), human_rights_model))
return(human_rights_models)
}
R/funs_plots.R
Code
# Diagnostic plots
<- function(model, params) {
plot_trace %>%
model ::gather_draws(!!!syms(params)) %>%
tidybayesggplot(aes(x = .iteration, y = .value, color = factor(.chain))) +
geom_line(linewidth = 0.1) +
scale_color_viridis_d(option = "rocket", end = 0.85) +
labs(color = "Chain") +
facet_wrap(vars(.variable), scales = "free_y") +
theme_pandem()
}
<- function(model, params) {
plot_trank %>%
model ::gather_draws(!!!syms(params)) %>%
tidybayesgroup_by(.variable) %>%
mutate(draw_rank = rank(.value)) %>%
ggplot(aes(x = draw_rank, color = factor(.chain))) +
stat_bin(geom = "step", binwidth = 200, position = position_identity(), boundary = 0) +
scale_color_viridis_d(option = "rocket", end = 0.85) +
labs(color = "Chain") +
facet_wrap(vars(.variable), scales = "free_y") +
theme_pandem()
}
<- function(model) {
plot_pp ::pp_check(model, ndraws = 100, type = "bars") +
bayesplottheme_pandem()
}
<- function(x, direction) {
fmt_p_inline <- round(x, 2)
x
if (direction == "gt") {
<- glue::glue(r"[$p(\Delta > 0) = {x}$]")
out else {
} <- glue::glue(r"[$p(\Delta < 0) = {x}$]")
out
}
return(out)
}
# Storing ggplot objects as rds files is BAD
# (https://github.com/hadley/ggplot2-book/issues/344)
#
# But it's also necessary/recommended when working with {targets}---the basic
# walkthrough in the manual even shows how to create a plot as a target
#
# ggplot objects are strange beasts because they store a copy of the overall
# environment when serialized into rds files. When working with regular-sized
# datasets, this isn't really ever a problem. But when working with tidy
# data frames of MCMC chains with millions and millions of rows, this can create
# rds files that are hundreds or thousands of MBs, which is wild.
#
# This post goes into more details about how to fix it:
# https://ropensci.org/blog/2022/12/06/save-ggplot2-targets/
#
# Instead of using stat_lineribbon() on tidy MCMC draws like I normally do, it's
# better to collapse the data down first with median_qi() and then use
# geom_lineribbon(). This results in much tinier data frames and plots render
# immediately in .qmd files
library(marginaleffects)
<- function(model_df, year_week_lookup) {
build_policies_plot_data <- model_df %>%
model_df_preds_mfx mutate(pred_data = map(model, ~{
datagrid(model = .x,
year_week_num = 1:69,
derogation_ineffect = 0:1) %>%
::add_epred_draws(.x) %>%
tidybayesleft_join(year_week_lookup, by = "year_week_num") %>%
mutate(derogation_ineffect = factor(derogation_ineffect,
levels = 0:1,
labels = c("No", "Yes"),
ordered = TRUE)) %>%
group_by(year_week_day, derogation_ineffect) %>%
::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
tidybayes%>%
})) mutate(mfx_data = map(model, ~{
%>%
.x comparisons(newdata = datagrid(year_week_num = 1:69),
variables = "derogation_ineffect",
type = "response") %>%
posterior_draws() %>%
left_join(year_week_lookup, by = "year_week_num") %>%
group_by(year_week_day) %>%
reframe(post_medians = tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95)),
p_gt_0 = sum(draw > 0) / n()) %>%
unnest(post_medians) %>%
rename(draw = y, .lower = ymin, .upper = ymax)
}))
return(model_df_preds_mfx)
}
<- function(model_df, year_quarter_lookup) {
build_human_rights_plot_data <- model_df %>%
model_df_preds_mfx mutate(pred_data = map2(model, family, ~{
if (.y == "cumulative") {
<- datagrid(model = .x,
df year_quarter_num = 1:6,
derogation_ineffect = 0:1) %>%
::add_epred_draws(.x) %>%
tidybayesleft_join(year_quarter_lookup, by = "year_quarter_num") %>%
mutate(derogation_ineffect = factor(derogation_ineffect,
levels = 0:1,
labels = c("No derogation", "Derogation in effect"),
ordered = TRUE)) %>%
group_by(year_quarter_day, year_quarter, .category, derogation_ineffect) %>%
::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
tidybayeselse {
} <- datagrid(model = .x,
df year_quarter_num = 1:6,
derogation_ineffect = 0:1) %>%
::add_epred_draws(.x) %>%
tidybayesleft_join(year_quarter_lookup, by = "year_quarter_num") %>%
mutate(derogation_ineffect = factor(derogation_ineffect,
levels = 0:1,
labels = c("No", "Yes"),
ordered = TRUE)) %>%
group_by(year_quarter_day, year_quarter, derogation_ineffect) %>%
::median_qi(.epred, .width = c(0.5, 0.8, 0.95))
tidybayes
}%>%
})) mutate(mfx_data = pmap(list(model, family, y), \(.model, .family, .y) {
<- .model %>%
mfx comparisons(newdata = datagrid(year_quarter_num = 1:6),
variables = "derogation_ineffect",
type = "response") %>%
posterior_draws() %>%
left_join(year_quarter_lookup, by = "year_quarter_num")
if (.family == "cumulative") {
<- mfx %>%
mfx mutate(group = factor(group, levels = levels(.model$data[[.y]]), ordered = TRUE)) %>%
group_by(year_quarter_day, year_quarter, group) %>%
reframe(post_medians = tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95)),
p_gt_0 = sum(draw > 0) / n()) %>%
unnest(post_medians) %>%
rename(draw = y, .lower = ymin, .upper = ymax)
else {
} <- mfx %>%
mfx group_by(year_quarter_day, year_quarter) %>%
reframe(post_medians = tidybayes::median_qi(draw, .width = c(0.5, 0.8, 0.95)),
p_gt_0 = sum(draw > 0) / n()) %>%
unnest(post_medians) %>%
rename(draw = y, .lower = ymin, .upper = ymax) %>%
mutate(group = "Logit")
}
mfx
}))
return(model_df_preds_mfx)
}
<- function(model_df) {
build_policies_table_data <- model_df %>%
policies_pred_tbl unnest(pred_data) %>%
filter(.width == 0.95) %>%
select(nice, year_week_day, derogation_ineffect, .epred, .lower, .upper) %>%
group_by(nice, derogation_ineffect) %>%
mutate(rank = dense_rank(.epred)) %>%
filter(rank == 1 | rank == max(rank)) %>%
mutate(rank = ifelse(rank == 1, "min", "max")) %>%
group_by(rank, derogation_ineffect) %>%
mutate(outcome = janitor::make_clean_names(nice)) %>%
rename(draw = .epred, min = .lower, max = .upper) %>%
mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0)))) %>%
ungroup()
<- model_df %>%
policies_mfx_tbl unnest(mfx_data) %>%
filter(.width == 0.95) %>%
select(nice, year_week_day, draw, .lower, .upper, p_gt_0, .width) %>%
group_by(nice) %>%
mutate(rank = dense_rank(draw)) %>%
filter(rank == 1 | rank == max(rank)) %>%
mutate(rank = ifelse(rank == 1, "min", "max")) %>%
group_by(rank) %>%
mutate(outcome = janitor::make_clean_names(nice)) %>%
rename(min = .lower, max = .upper) %>%
mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0))),
p_lt_0 = p_gt_0 - 1) %>%
mutate(p_gt = fmt_p_inline(p_gt_0, "gt"),
p_lt = fmt_p_inline(p_lt_0, "lt")) %>%
ungroup()
return(lst(policies_pred_tbl, policies_mfx_tbl))
}
<- function(model_df) {
build_hr_table_data <- model_df %>%
hr_pred_tbl mutate(pred_data = map(pred_data, ~{
%>%
.x mutate(derogation_ineffect = as.character(derogation_ineffect),
derogation_ineffect = recode(derogation_ineffect,
"No derogation" = "No",
"Derogation in effect" = "Yes"))
%>%
})) unnest(pred_data) %>%
filter(.width == 0.95) %>%
select(nice, year_quarter_day, derogation_ineffect, .category, .epred, .lower, .upper) %>%
group_by(nice, derogation_ineffect, .category) %>%
mutate(rank = dense_rank(.epred)) %>%
filter(rank == 1 | rank == max(rank)) %>%
mutate(rank = ifelse(rank == 1, "min", "max")) %>%
group_by(rank, derogation_ineffect, .category) %>%
mutate(outcome = janitor::make_clean_names(nice)) %>%
rename(draw = .epred, min = .lower, max = .upper) %>%
mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0)))) %>%
ungroup() %>%
mutate(category = ifelse(is.na(.category), "NA", as.character(.category)))
<- model_df %>%
hr_mfx_tbl mutate(mfx_data = map(mfx_data, ~{
%>%
.x mutate(group = as.character(group))
%>%
})) unnest(mfx_data) %>%
filter(.width == 0.95) %>%
select(nice, year_quarter_day, group, draw, .lower, .upper, p_gt_0, .width) %>%
group_by(nice, group) %>%
mutate(rank = dense_rank(draw)) %>%
filter(rank == 1 | rank == max(rank)) %>%
mutate(rank = ifelse(rank == 1, "min", "max")) %>%
group_by(rank, group) %>%
mutate(outcome = janitor::make_clean_names(nice)) %>%
rename(min = .lower, max = .upper) %>%
mutate(across(c(draw, min, max), list(nice = ~round(. * 100, 0))),
p_lt_0 = 1 - p_gt_0) %>%
mutate(p_gt = fmt_p_inline(p_gt_0, "gt"),
p_lt = fmt_p_inline(p_lt_0, "lt")) %>%
ungroup()
return(lst(hr_pred_tbl, hr_mfx_tbl))
}