library(tidyverse)
library(targets)
library(broom)
library(broom.mixed)
library(tidybayes)
library(glue)
library(brms)
library(scales)
library(kableExtra)
library(modelsummary)
library(lubridate)
library(here)
# Generated via random.org
set.seed(9936)
# Load models
::with_dir(here::here(), {
withrsource(tar_read(plot_funs))
# Load big list of models
tar_load(model_df)
# Load actual model objects
# This works, but doesn't put the models in the dependency network for some reason
# tar_load(model_df$model)
# This also works but again doesn't register correctly as dependencies
# tar_load(starts_with("m_pts"))
# So we do it this manual way
tar_load(c(m_pts_baseline, m_pts_total, m_pts_total_new,
m_pts_advocacy, m_pts_entry, m_pts_funding,
m_pts_v2csreprss,
m_lhr_baseline, m_lhr_total, m_lhr_total_new,
m_lhr_advocacy, m_lhr_entry, m_lhr_funding,
m_lhr_v2csreprss,
m_pts_baseline_train, m_pts_total_train,
m_pts_advocacy_train, m_pts_entry_train, m_pts_funding_train,
m_pts_v2csreprss_train,
m_lhr_baseline_train, m_lhr_total_train, m_lhr_total_new_train,
m_lhr_advocacy_train, m_lhr_entry_train, m_lhr_funding_train,
m_lhr_v2csreprss_train)) })
<- "Personal"
computer
if (computer == "Work") {
<- "2019 MacBook Pro with 16 cores and 32 GB of RAM, using Stan through brms through cmdstanr"
computer_details else {
} <- "2021 MacBook Pro M1 Max with 10 cores and 32 GB of RAM, using Stan through brms through cmdstanr"
computer_details }
We ran these models on a 2021 MacBook Pro M1 Max with 10 cores and 32 GB of RAM, using Stan through brms through cmdstanr.
<- model_df %>%
models_e1 filter(!str_detect(model, "v2csreprss")) %>%
mutate(actual_model = model %>% map(~eval(rlang::sym(.)))) %>%
mutate(across(c(outcome_var, explan_var, family), ~fct_inorder(., ordered = TRUE)))
<- models_e1 %>%
model_time_e1 filter(training == "Full data") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
<- tibble(Outcome = "Total",
total_row_e1 `Total time (i.e. longest chain)` =
as.duration(sum(model_time_e1$`Total time (i.e. longest chain)`)))
<- model_time_e1 %>%
model_time_e1 bind_rows(total_row_e1)
%>%
model_time_e1 select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e1$Outcome))) %>%
kable_styling()
Model | Main predictor | Family | Total time (i.e. longest chain) |
---|---|---|---|
Political terror | |||
m_pts_baseline
|
Baseline | Ordered logit | 80.659s (~1.34 minutes) |
m_pts_total
|
Total legal barriers | Ordered logit | 81.274s (~1.35 minutes) |
m_pts_total_new
|
New legal barriers | Ordered logit | 72.786s (~1.21 minutes) |
m_pts_advocacy
|
Barriers to advocacy | Ordered logit | 80.014s (~1.33 minutes) |
m_pts_entry
|
Barriers to entry | Ordered logit | 76.606s (~1.28 minutes) |
m_pts_funding
|
Barriers to funding | Ordered logit | 77.1s (~1.29 minutes) |
Latent human rights | |||
m_lhr_baseline
|
Baseline | OLS | 13.227s |
m_lhr_total
|
Total legal barriers | OLS | 13.997s |
m_lhr_total_new
|
New legal barriers | OLS | 13.137s |
m_lhr_advocacy
|
Barriers to advocacy | OLS | 15.639s |
m_lhr_entry
|
Barriers to entry | OLS | 13.714s |
m_lhr_funding
|
Barriers to funding | OLS | 14.51s |
Total | |||
552.663s (~9.21 minutes) |
<- models_e1 %>%
model_time_e1 filter(training == "Training") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
<- tibble(Outcome = "Total",
total_row_e1 `Total time (i.e. longest chain)` =
as.duration(sum(model_time_e1$`Total time (i.e. longest chain)`)))
<- model_time_e1 %>%
model_time_e1 bind_rows(total_row_e1)
%>%
model_time_e1 select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e1$Outcome))) %>%
kable_styling()
Model | Main predictor | Family | Total time (i.e. longest chain) |
---|---|---|---|
Political terror | |||
m_pts_baseline_train
|
Baseline | Ordered logit | 69.16s (~1.15 minutes) |
m_pts_total_train
|
Total legal barriers | Ordered logit | 66.243s (~1.1 minutes) |
m_pts_advocacy_train
|
Barriers to advocacy | Ordered logit | 71.552s (~1.19 minutes) |
m_pts_entry_train
|
Barriers to entry | Ordered logit | 66.969s (~1.12 minutes) |
m_pts_funding_train
|
Barriers to funding | Ordered logit | 67.204s (~1.12 minutes) |
Latent human rights | |||
m_lhr_baseline_train
|
Baseline | OLS | 11.832s |
m_lhr_total_train
|
Total legal barriers | OLS | 12.77s |
m_lhr_total_new_train
|
New legal barriers | OLS | 12.13s |
m_lhr_advocacy_train
|
Barriers to advocacy | OLS | 15.035s |
m_lhr_entry_train
|
Barriers to entry | OLS | 14.105s |
m_lhr_funding_train
|
Barriers to funding | OLS | 13.531s |
Total | |||
420.531s (~7.01 minutes) |
<- model_df %>%
models_e2 filter(str_detect(model, "baseline") | str_detect(model, "v2csreprss")) %>%
mutate(actual_model = model %>% map(~eval(rlang::sym(.)))) %>%
mutate(across(c(outcome_var, explan_var, family), ~fct_inorder(., ordered = TRUE)))
<- models_e2 %>%
model_time_e2 filter(training == "Full data") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
<- tibble(Outcome = "Total",
total_row_e2 `Total time (i.e. longest chain)` =
as.duration(sum(model_time_e2$`Total time (i.e. longest chain)`)))
<- model_time_e2 %>%
model_time_e2 bind_rows(total_row_e2)
%>%
model_time_e2 select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e2$Outcome))) %>%
kable_styling()
Model | Main predictor | Family | Total time (i.e. longest chain) |
---|---|---|---|
Political terror | |||
m_pts_baseline
|
Baseline | Ordered logit | 80.659s (~1.34 minutes) |
m_pts_v2csreprss
|
Civil society repression | Ordered logit | 85.602s (~1.43 minutes) |
Latent human rights | |||
m_lhr_baseline
|
Baseline | OLS | 13.227s |
m_lhr_v2csreprss
|
Civil society repression | OLS | 14.233s |
Total | |||
193.721s (~3.23 minutes) |
<- models_e2 %>%
model_time_e2 filter(training == "Training") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
<- tibble(Outcome = "Total",
total_row_e2 `Total time (i.e. longest chain)` =
as.duration(sum(model_time_e2$`Total time (i.e. longest chain)`)))
<- model_time_e2 %>%
model_time_e2 bind_rows(total_row_e2)
%>%
model_time_e2 select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e2$Outcome))) %>%
kable_styling()
Model | Main predictor | Family | Total time (i.e. longest chain) |
---|---|---|---|
Political terror | |||
m_pts_baseline_train
|
Baseline | Ordered logit | 69.16s (~1.15 minutes) |
m_pts_v2csreprss_train
|
Civil society repression | Ordered logit | 74.994s (~1.25 minutes) |
Latent human rights | |||
m_lhr_baseline_train
|
Baseline | OLS | 11.832s |
m_lhr_v2csreprss_train
|
Civil society repression | OLS | 16.914s |
Total | |||
172.9s (~2.88 minutes) |
All the models are run with a targets
pipeline with dataset-specific functions that live in these files:
R/models_pts.R
(for PTS-based models)R/models_lhr.R
(for latent human rights-based models)To keep the analysis relatively self contained here in the analysis notebook, and to make it so there’s no need to hunt through the GitHub repository to find the actual code, here’s that code:
R/models_pts.R
# Settings ----------------------------------------------------------------
# Run this inside each model function instead of outside so that future workers
# use these options internally
<- function() {
pts_setup options(worker_options)
# Settings
<- 4
CHAINS <- 2000
ITER <- 1000
WARMUP <- 2009 # From random.org
BAYES_SEED <- getOption("mc.threads")
threads
# Priors
<- c(set_prior("normal(0, 3)", class = "Intercept"),
priors_vague set_prior("normal(0, 3)", class = "b"),
set_prior("cauchy(0, 1)", class = "sd"))
return(list(chains = CHAINS, iter = ITER, warmup = WARMUP, seed = BAYES_SEED,
threads = threads, priors_vague = priors_vague))
}
# Regular models ----------------------------------------------------------
<- function(dat) {
f_pts_baseline <- pts_setup()
pts_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(PTS_factor_lead1 ~ PTS_factor +
+
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
<- function(dat) {
f_pts_total <- pts_setup()
pts_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(PTS_factor_lead1 ~ barriers_total + barriers_total_lag1 +
+
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
<- function(dat) {
f_pts_total_new <- pts_setup()
pts_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(PTS_factor_lead1 ~ barriers_total_new + barriers_total_new_lag1 +
+
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
<- function(dat) {
f_pts_advocacy <- pts_setup()
pts_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(PTS_factor_lead1 ~ advocacy + advocacy_lag1 +
+
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
<- function(dat) {
f_pts_entry <- pts_setup()
pts_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(PTS_factor_lead1 ~ entry + entry_lag1 +
+
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
<- function(dat) {
f_pts_funding <- pts_setup()
pts_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(PTS_factor_lead1 ~ funding + funding_lag1 +
+
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
<- function(dat) {
f_pts_v2csreprss <- pts_setup()
pts_settings
<- brm(
model bf(PTS_factor_lead1 ~ v2csreprss + v2csreprss_lag1 +
+
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
R/models_lhr.R
# Settings ----------------------------------------------------------------
<- function() {
lhr_setup options(worker_options)
# Settings
<- 4
CHAINS <- 2000
ITER <- 1000
WARMUP <- 4045 # From random.org
BAYES_SEED <- getOption("mc.threads")
threads
# Priors
<- c(set_prior("normal(0, 10)", class = "Intercept"),
priors_vague set_prior("normal(0, 3)", class = "b"),
set_prior("cauchy(0, 1)", class = "sd"))
return(list(chains = CHAINS, iter = ITER, warmup = WARMUP, seed = BAYES_SEED,
threads = threads, priors_vague = priors_vague))
}
# Regular models ----------------------------------------------------------
<- function(dat) {
f_lhr_baseline <- lhr_setup()
lhr_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(latent_hr_mean_lead1 ~ latent_hr_mean +
+
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
<- function(dat) {
f_lhr_total <- lhr_setup()
lhr_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(latent_hr_mean_lead1 ~ barriers_total + barriers_total_lag1 +
+
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
<- function(dat) {
f_lhr_total_new <- lhr_setup()
lhr_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(latent_hr_mean_lead1 ~ barriers_total_new + barriers_total_new_lag1 +
+
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
<- function(dat) {
f_lhr_advocacy <- lhr_setup()
lhr_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(latent_hr_mean_lead1 ~ advocacy + advocacy_lag1 +
+
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
<- function(dat) {
f_lhr_entry <- lhr_setup()
lhr_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(latent_hr_mean_lead1 ~ entry + entry_lag1 +
+
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
<- function(dat) {
f_lhr_funding <- lhr_setup()
lhr_settings
<- dat %>% filter(laws)
dat
<- brm(
model bf(latent_hr_mean_lead1 ~ funding + funding_lag1 +
+
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
<- function(dat) {
f_lhr_v2csreprss <- lhr_setup()
lhr_settings
<- brm(
model bf(latent_hr_mean_lead1 ~ v2csreprss + v2csreprss_lag1 +
+
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict 1 | gwcode)
(
),family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}