library(tidyverse)
library(targets)
library(broom)
library(broom.mixed)
library(tidybayes)
library(glue)
library(brms)
library(scales)
library(kableExtra)
library(modelsummary)
library(lubridate)
library(here)
# Generated via random.org
set.seed(9936)
# Load models
withr::with_dir(here::here(), {
source(tar_read(plot_funs))
# Load big list of models
tar_load(model_df)
# Load actual model objects
# This works, but doesn't put the models in the dependency network for some reason
# tar_load(model_df$model)
# This also works but again doesn't register correctly as dependencies
# tar_load(starts_with("m_pts"))
# So we do it this manual way
tar_load(c(m_pts_baseline, m_pts_total, m_pts_total_new,
m_pts_advocacy, m_pts_entry, m_pts_funding,
m_pts_v2csreprss,
m_lhr_baseline, m_lhr_total, m_lhr_total_new,
m_lhr_advocacy, m_lhr_entry, m_lhr_funding,
m_lhr_v2csreprss,
m_pts_baseline_train, m_pts_total_train,
m_pts_advocacy_train, m_pts_entry_train, m_pts_funding_train,
m_pts_v2csreprss_train,
m_lhr_baseline_train, m_lhr_total_train, m_lhr_total_new_train,
m_lhr_advocacy_train, m_lhr_entry_train, m_lhr_funding_train,
m_lhr_v2csreprss_train))
})computer <- "Personal"
if (computer == "Work") {
computer_details <- "2019 MacBook Pro with 16 cores and 32 GB of RAM, using Stan through brms through cmdstanr"
} else {
computer_details <- "2021 MacBook Pro M1 Max with 10 cores and 32 GB of RAM, using Stan through brms through cmdstanr"
}We ran these models on a 2021 MacBook Pro M1 Max with 10 cores and 32 GB of RAM, using Stan through brms through cmdstanr.
models_e1 <- model_df %>%
filter(!str_detect(model, "v2csreprss")) %>%
mutate(actual_model = model %>% map(~eval(rlang::sym(.)))) %>%
mutate(across(c(outcome_var, explan_var, family), ~fct_inorder(., ordered = TRUE)))
model_time_e1 <- models_e1 %>%
filter(training == "Full data") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
total_row_e1 <- tibble(Outcome = "Total",
`Total time (i.e. longest chain)` =
as.duration(sum(model_time_e1$`Total time (i.e. longest chain)`)))
model_time_e1 <- model_time_e1 %>%
bind_rows(total_row_e1)
model_time_e1 %>%
select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e1$Outcome))) %>%
kable_styling()| Model | Main predictor | Family | Total time (i.e. longest chain) |
|---|---|---|---|
| Political terror | |||
m_pts_baseline
|
Baseline | Ordered logit | 80.659s (~1.34 minutes) |
m_pts_total
|
Total legal barriers | Ordered logit | 81.274s (~1.35 minutes) |
m_pts_total_new
|
New legal barriers | Ordered logit | 72.786s (~1.21 minutes) |
m_pts_advocacy
|
Barriers to advocacy | Ordered logit | 80.014s (~1.33 minutes) |
m_pts_entry
|
Barriers to entry | Ordered logit | 76.606s (~1.28 minutes) |
m_pts_funding
|
Barriers to funding | Ordered logit | 77.1s (~1.29 minutes) |
| Latent human rights | |||
m_lhr_baseline
|
Baseline | OLS | 13.227s |
m_lhr_total
|
Total legal barriers | OLS | 13.997s |
m_lhr_total_new
|
New legal barriers | OLS | 13.137s |
m_lhr_advocacy
|
Barriers to advocacy | OLS | 15.639s |
m_lhr_entry
|
Barriers to entry | OLS | 13.714s |
m_lhr_funding
|
Barriers to funding | OLS | 14.51s |
| Total | |||
| 552.663s (~9.21 minutes) | |||
model_time_e1 <- models_e1 %>%
filter(training == "Training") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
total_row_e1 <- tibble(Outcome = "Total",
`Total time (i.e. longest chain)` =
as.duration(sum(model_time_e1$`Total time (i.e. longest chain)`)))
model_time_e1 <- model_time_e1 %>%
bind_rows(total_row_e1)
model_time_e1 %>%
select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e1$Outcome))) %>%
kable_styling()| Model | Main predictor | Family | Total time (i.e. longest chain) |
|---|---|---|---|
| Political terror | |||
m_pts_baseline_train
|
Baseline | Ordered logit | 69.16s (~1.15 minutes) |
m_pts_total_train
|
Total legal barriers | Ordered logit | 66.243s (~1.1 minutes) |
m_pts_advocacy_train
|
Barriers to advocacy | Ordered logit | 71.552s (~1.19 minutes) |
m_pts_entry_train
|
Barriers to entry | Ordered logit | 66.969s (~1.12 minutes) |
m_pts_funding_train
|
Barriers to funding | Ordered logit | 67.204s (~1.12 minutes) |
| Latent human rights | |||
m_lhr_baseline_train
|
Baseline | OLS | 11.832s |
m_lhr_total_train
|
Total legal barriers | OLS | 12.77s |
m_lhr_total_new_train
|
New legal barriers | OLS | 12.13s |
m_lhr_advocacy_train
|
Barriers to advocacy | OLS | 15.035s |
m_lhr_entry_train
|
Barriers to entry | OLS | 14.105s |
m_lhr_funding_train
|
Barriers to funding | OLS | 13.531s |
| Total | |||
| 420.531s (~7.01 minutes) | |||
models_e2 <- model_df %>%
filter(str_detect(model, "baseline") | str_detect(model, "v2csreprss")) %>%
mutate(actual_model = model %>% map(~eval(rlang::sym(.)))) %>%
mutate(across(c(outcome_var, explan_var, family), ~fct_inorder(., ordered = TRUE)))
model_time_e2 <- models_e2 %>%
filter(training == "Full data") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
total_row_e2 <- tibble(Outcome = "Total",
`Total time (i.e. longest chain)` =
as.duration(sum(model_time_e2$`Total time (i.e. longest chain)`)))
model_time_e2 <- model_time_e2 %>%
bind_rows(total_row_e2)
model_time_e2 %>%
select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e2$Outcome))) %>%
kable_styling()| Model | Main predictor | Family | Total time (i.e. longest chain) |
|---|---|---|---|
| Political terror | |||
m_pts_baseline
|
Baseline | Ordered logit | 80.659s (~1.34 minutes) |
m_pts_v2csreprss
|
Civil society repression | Ordered logit | 85.602s (~1.43 minutes) |
| Latent human rights | |||
m_lhr_baseline
|
Baseline | OLS | 13.227s |
m_lhr_v2csreprss
|
Civil society repression | OLS | 14.233s |
| Total | |||
| 193.721s (~3.23 minutes) | |||
model_time_e2 <- models_e2 %>%
filter(training == "Training") %>%
mutate(duration = map(actual_model, ~rstan::get_elapsed_time(.$fit)),
duration = map(duration, ~rownames_to_column(as_tibble(.)))) %>%
select(-actual_model) %>%
unnest(duration) %>%
mutate(model = glue("<code>{model}</code>")) %>%
group_by(Model = model, Outcome = outcome_var,
`Main predictor` = explan_var, `Family` = family) %>%
summarize(`Total time (i.e. longest chain)` = as.duration(max(warmup + sample))) %>%
ungroup() %>%
arrange(Outcome, `Main predictor`)
total_row_e2 <- tibble(Outcome = "Total",
`Total time (i.e. longest chain)` =
as.duration(sum(model_time_e2$`Total time (i.e. longest chain)`)))
model_time_e2 <- model_time_e2 %>%
bind_rows(total_row_e2)
model_time_e2 %>%
select(-Outcome) %>%
kbl(escape = FALSE) %>%
pack_rows(index = table(fct_inorder(model_time_e2$Outcome))) %>%
kable_styling()| Model | Main predictor | Family | Total time (i.e. longest chain) |
|---|---|---|---|
| Political terror | |||
m_pts_baseline_train
|
Baseline | Ordered logit | 69.16s (~1.15 minutes) |
m_pts_v2csreprss_train
|
Civil society repression | Ordered logit | 74.994s (~1.25 minutes) |
| Latent human rights | |||
m_lhr_baseline_train
|
Baseline | OLS | 11.832s |
m_lhr_v2csreprss_train
|
Civil society repression | OLS | 16.914s |
| Total | |||
| 172.9s (~2.88 minutes) | |||
All the models are run with a targets pipeline with dataset-specific functions that live in these files:
R/models_pts.R (for PTS-based models)R/models_lhr.R (for latent human rights-based models)To keep the analysis relatively self contained here in the analysis notebook, and to make it so there’s no need to hunt through the GitHub repository to find the actual code, here’s that code:
R/models_pts.R# Settings ----------------------------------------------------------------
# Run this inside each model function instead of outside so that future workers
# use these options internally
pts_setup <- function() {
options(worker_options)
# Settings
CHAINS <- 4
ITER <- 2000
WARMUP <- 1000
BAYES_SEED <- 2009 # From random.org
threads <- getOption("mc.threads")
# Priors
priors_vague <- c(set_prior("normal(0, 3)", class = "Intercept"),
set_prior("normal(0, 3)", class = "b"),
set_prior("cauchy(0, 1)", class = "sd"))
return(list(chains = CHAINS, iter = ITER, warmup = WARMUP, seed = BAYES_SEED,
threads = threads, priors_vague = priors_vague))
}
# Regular models ----------------------------------------------------------
f_pts_baseline <- function(dat) {
pts_settings <- pts_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(PTS_factor_lead1 ~ PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
f_pts_total <- function(dat) {
pts_settings <- pts_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(PTS_factor_lead1 ~ barriers_total + barriers_total_lag1 +
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
f_pts_total_new <- function(dat) {
pts_settings <- pts_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(PTS_factor_lead1 ~ barriers_total_new + barriers_total_new_lag1 +
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
f_pts_advocacy <- function(dat) {
pts_settings <- pts_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(PTS_factor_lead1 ~ advocacy + advocacy_lag1 +
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
f_pts_entry <- function(dat) {
pts_settings <- pts_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(PTS_factor_lead1 ~ entry + entry_lag1 +
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
f_pts_funding <- function(dat) {
pts_settings <- pts_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(PTS_factor_lead1 ~ funding + funding_lag1 +
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}
f_pts_v2csreprss <- function(dat) {
pts_settings <- pts_setup()
model <- brm(
bf(PTS_factor_lead1 ~ v2csreprss + v2csreprss_lag1 +
PTS_factor +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = cumulative(),
prior = pts_settings$priors_vague,
data = dat,
threads = threading(pts_settings$threads),
chains = pts_settings$chains, iter = pts_settings$iter,
warmup = pts_settings$warmup, seed = pts_settings$seed)
return(model)
}R/models_lhr.R# Settings ----------------------------------------------------------------
lhr_setup <- function() {
options(worker_options)
# Settings
CHAINS <- 4
ITER <- 2000
WARMUP <- 1000
BAYES_SEED <- 4045 # From random.org
threads <- getOption("mc.threads")
# Priors
priors_vague <- c(set_prior("normal(0, 10)", class = "Intercept"),
set_prior("normal(0, 3)", class = "b"),
set_prior("cauchy(0, 1)", class = "sd"))
return(list(chains = CHAINS, iter = ITER, warmup = WARMUP, seed = BAYES_SEED,
threads = threads, priors_vague = priors_vague))
}
# Regular models ----------------------------------------------------------
f_lhr_baseline <- function(dat) {
lhr_settings <- lhr_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(latent_hr_mean_lead1 ~ latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
f_lhr_total <- function(dat) {
lhr_settings <- lhr_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(latent_hr_mean_lead1 ~ barriers_total + barriers_total_lag1 +
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
f_lhr_total_new <- function(dat) {
lhr_settings <- lhr_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(latent_hr_mean_lead1 ~ barriers_total_new + barriers_total_new_lag1 +
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
f_lhr_advocacy <- function(dat) {
lhr_settings <- lhr_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(latent_hr_mean_lead1 ~ advocacy + advocacy_lag1 +
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
f_lhr_entry <- function(dat) {
lhr_settings <- lhr_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(latent_hr_mean_lead1 ~ entry + entry_lag1 +
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
f_lhr_funding <- function(dat) {
lhr_settings <- lhr_setup()
dat <- dat %>% filter(laws)
model <- brm(
bf(latent_hr_mean_lead1 ~ funding + funding_lag1 +
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}
f_lhr_v2csreprss <- function(dat) {
lhr_settings <- lhr_setup()
model <- brm(
bf(latent_hr_mean_lead1 ~ v2csreprss + v2csreprss_lag1 +
latent_hr_mean +
v2x_polyarchy +
gdpcap_log +
un_trade_pct_gdp +
armed_conflict +
(1 | gwcode)
),
family = gaussian(),
prior = lhr_settings$priors_vague,
control = list(adapt_delta = 0.9),
data = dat,
threads = threading(lhr_settings$threads),
chains = lhr_settings$chains, iter = lhr_settings$iter,
warmup = lhr_settings$warmup, seed = lhr_settings$seed)
return(model)
}