library(tidyverse)
library(targets)
library(yardstick)
library(broom)
library(patchwork)
library(scales)
library(here)

# Load data
# Need to use this withr thing because tar_read() and tar_load() need to see the
# _targets folder in the current directory, but this .Rmd file is in a subfolder
withr::with_dir(here::here(), {
  source(tar_read(plot_funs))
  
  canary_testing_lagged <- tar_read(panel_testing_lagged)
  
  # Load big list of models
  model_df <- tar_read(model_df) %>% 
    filter(str_detect(model, "baseline") | str_detect(model, "v2csreprss"))
  
  # Load actual model objects
  tar_load(c(m_pts_baseline_train, m_pts_v2csreprss_train,
             m_pts_total_train, m_pts_advocacy_train, 
             m_pts_entry_train, m_pts_funding_train,
             m_lhr_baseline_train, m_lhr_v2csreprss_train,
             m_lhr_total_train, m_lhr_advocacy_train, 
             m_lhr_entry_train, m_lhr_funding_train))
})

pts_levels <- levels(canary_testing_lagged$PTS_factor)

# Returns a data frame of predicted probabilities with actual = 1 
# when the response outcome happens
match_actual <- function(x, pred, actual) {
  pred %>% 
    select(fitted = {{x}}) %>% 
    mutate(actual = as.numeric(actual == colnames(pred[x])),
           plot_level = colnames(pred[x]))
}

E1a: NGO laws and political terror

seed_pts_prediction <- 5494  # From random.org

set.seed(seed_pts_prediction)
pred_pts_baseline <- predict(
  m_pts_baseline_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_total <- predict(
  m_pts_total_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_advocacy <- predict(
  m_pts_advocacy_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_entry <- predict(
  m_pts_entry_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_funding <- predict(
  m_pts_funding_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

Separation plots

Baseline models

# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_baseline), 
                         FUN = match_actual, 
                         pred = pred_pts_baseline, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_baseline)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")

Models with NGO laws

Barriers to advocacy

# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_advocacy), 
                         FUN = match_actual, 
                         pred = pred_pts_advocacy, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_advocacy)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")

Barriers to entry

# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_entry), 
                         FUN = match_actual, 
                         pred = pred_pts_entry, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_entry)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")

Barriers to funding

# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_funding), 
                         FUN = match_actual, 
                         pred = pred_pts_funding, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_funding)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")

Prediction diagnostics

pred_pts_baseline_categories <- pred_pts_baseline %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_total_categories <- pred_pts_total %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_advocacy_categories <- pred_pts_advocacy %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_entry_categories <- pred_pts_entry %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_funding_categories <- pred_pts_funding %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)


pts_cat_preds <- canary_testing_lagged %>% 
  select(country, year, actual = PTS_factor_lead1) %>% 
  mutate(pred_baseline = pred_pts_baseline_categories$predicted,
         pred_total = pred_pts_total_categories$predicted,
         pred_advocacy = pred_pts_advocacy_categories$predicted,
         pred_entry = pred_pts_entry_categories$predicted,
         pred_funding = pred_pts_funding_categories$predicted) %>% 
  filter(!is.na(actual)) %>%
  mutate(is_correct_baseline = actual == pred_baseline,
         is_correct_total = actual == pred_total,
         is_correct_advocacy = actual == pred_advocacy,
         is_correct_entry = actual == pred_entry,
         is_correct_funding = actual == pred_funding)

# Improved predictions
pts_improved_total <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_total) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_total,
         `Baseline` = pred_baseline)

pts_improved_advocacy <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_advocacy) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_advocacy,
         `Baseline` = pred_baseline)

pts_improved_entry <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_entry) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_entry,
         `Baseline` = pred_baseline)

pts_improved_funding <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_funding) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_funding,
         `Baseline` = pred_baseline)

pts_improved_total %>% 
  kbl(caption = "Cases improved with total legal barriers") %>% 
  kable_styling()
Cases improved with total legal barriers
Country Year Actual CS repression included Baseline
Guatemala 2013 Level 3 Level 3 Level 2
Kosovo 2012 Level 1 Level 1 Level 2
Greece 2011 Level 2 Level 2 Level 3
Sierra Leone 2011 Level 3 Level 3 Level 2
Congo - Brazzaville 2011 Level 3 Level 3 Level 2
Somalia 2013 Level 5 Level 5 Level 4
Bahrain 2011 Level 3 Level 3 Level 2
Bahrain 2013 Level 3 Level 3 Level 2
pts_improved_advocacy %>% 
  kbl(caption = "Cases improved with barriers to advocacy") %>% 
  kable_styling()
Cases improved with barriers to advocacy
Country Year Actual CS repression included Baseline
Guatemala 2013 Level 3 Level 3 Level 2
Bulgaria 2012 Level 2 Level 2 Level 1
Congo - Brazzaville 2011 Level 3 Level 3 Level 2
Bahrain 2011 Level 3 Level 3 Level 2
Bahrain 2013 Level 3 Level 3 Level 2
pts_improved_entry %>% 
  kbl(caption = "Cases improved with barriers to entry") %>% 
  kable_styling()
Cases improved with barriers to entry
Country Year Actual CS repression included Baseline
Guatemala 2013 Level 3 Level 3 Level 2
Kosovo 2012 Level 1 Level 1 Level 2
Greece 2011 Level 2 Level 2 Level 3
Sierra Leone 2011 Level 3 Level 3 Level 2
Bahrain 2013 Level 3 Level 3 Level 2
pts_improved_funding %>% 
  kbl(caption = "Cases improved with barriers to funding") %>% 
  kable_styling()
Cases improved with barriers to funding
Country Year Actual CS repression included Baseline
Guatemala 2013 Level 3 Level 3 Level 2
Bulgaria 2012 Level 2 Level 2 Level 1
Sierra Leone 2011 Level 3 Level 3 Level 2
Congo - Brazzaville 2011 Level 3 Level 3 Level 2
Tunisia 2013 Level 3 Level 3 Level 2
Bahrain 2011 Level 3 Level 3 Level 2
Bahrain 2013 Level 3 Level 3 Level 2
correct_lookup <- tribble(
  ~name, ~nice_name,
  "is_correct_total", "Total legal barriers included",
  "is_correct_advocacy", "Barriers to advocacy included",
  "is_correct_entry", "Barriers to entry included",
  "is_correct_funding", "Barriers to funding included",
  "is_correct_baseline", "Baseline"
) %>% mutate(nice_name = fct_inorder(nice_name, ordered = TRUE))

pts_pred_table <- pts_cat_preds %>% 
  pivot_longer(starts_with("is_correct")) %>% 
  group_by(name, value) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  left_join(correct_lookup, by = "name") %>% 
  mutate(Prediction = ifelse(value == TRUE, "Correct", "Wrong")) %>% 
  arrange(nice_name) %>% 
  select(-name, -value) %>% 
  pivot_wider(names_from = "nice_name", values_from = "n")

pts_pred_table %>% 
  kbl(align = "lccccc") %>% 
  kable_styling()
Prediction Total legal barriers included Barriers to advocacy included Barriers to entry included Barriers to funding included Baseline
Wrong 107 110 108 108 109
Correct 378 375 377 377 376

 

E1b: NGO laws and latent human rights

seed_lhr_prediction <- 3207  # From random.org

set.seed(seed_lhr_prediction)
pred_lhr_baseline <- predict(m_lhr_baseline_train, 
                             newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                             allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_total <- predict(m_lhr_total_train, 
                          newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                          allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_advocacy <- predict(m_lhr_advocacy_train, 
                             newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                             allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_entry <- predict(m_lhr_entry_train, 
                          newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                          allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_funding <- predict(m_lhr_funding_train, 
                            newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                            allow_new_levels = TRUE)

pred_lhr_baseline_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_baseline)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_total_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_total)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_advocacy_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_advocacy)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_entry_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_entry)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_funding_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_funding)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

Improvement from baseline models

plot_baseline <- ggplot(pred_lhr_baseline_test, 
                        aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Baseline",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_total <- ggplot(pred_lhr_total_test, 
                     aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Total legal barriers",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_advocacy <- ggplot(pred_lhr_advocacy_test, 
                        aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Barriers to advocacy",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_entry <- ggplot(pred_lhr_entry_test, 
                     aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Barriers to entry",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_funding <- ggplot(pred_lhr_funding_test, 
                       aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Barriers to funding",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

(plot_baseline | plot_total) / (plot_advocacy | plot_entry | plot_funding)

lotsa_metrics <- metric_set(rmse, rsq, mae, ccc)

bind_rows(lotsa_metrics(pred_lhr_baseline_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Baseline"),
          lotsa_metrics(pred_lhr_total_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Total legal barriers"),
          lotsa_metrics(pred_lhr_advocacy_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Barriers to advocacy"),
          lotsa_metrics(pred_lhr_entry_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Barriers to entry"),
          lotsa_metrics(pred_lhr_funding_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Barriers to funding")) %>% 
  select(Model, .metric, .estimate) %>% 
  pivot_wider(names_from = ".metric", values_from = ".estimate")
Model rmse rsq mae ccc
Baseline 0.1862156 0.9822788 0.1080593 0.9887926
Total legal barriers 0.1872181 0.9819975 0.1085471 0.9886937
Barriers to advocacy 0.1872011 0.9819955 0.1088119 0.9886847
Barriers to entry 0.1862635 0.9821720 0.1075320 0.9887725
Barriers to funding 0.1879768 0.9818439 0.1094280 0.9886148

Most improved countries

Barriers to advocacy

most_improved_lhr_advocacy <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_advocacy_test %>% 
    mutate(error_advocacy = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_advocacy = Estimate, error_advocacy)
) %>% 
  mutate(improved = error_advocacy < 0 & abs(error_advocacy) < abs(error_baseline)) %>% 
  mutate(diff = error_advocacy - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_advocacy)
## [1] 133
most_improved_lhr_advocacy %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_advocacy,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
Country Year Actual latent human rights value (t + 1) Predicted (baseline) Predicted (CS repression) |CS repression − baseline|
Belarus 2012 0.208 0.245 0.207 0.037
North Korea 2012 -1.669 -1.637 -1.670 0.033
Cuba 2012 -0.078 -0.054 -0.087 0.033
Algeria 2012 0.469 0.484 0.455 0.029
Bhutan 2013 1.728 1.492 1.515 0.023

Barriers to entry

most_improved_lhr_entry <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_entry_test %>% 
    mutate(error_entry = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_entry = Estimate, error_entry)
) %>% 
  mutate(improved = error_entry < 0 & abs(error_entry) < abs(error_baseline)) %>% 
  mutate(diff = error_entry - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_entry)
## [1] 116
most_improved_lhr_entry %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_entry,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
Country Year Actual latent human rights value (t + 1) Predicted (baseline) Predicted (CS repression) |CS repression − baseline|
Tunisia 2011 0.261 0.149 0.174 0.026
New Zealand 2012 3.605 3.536 3.559 0.023
Cuba 2013 -0.072 -0.061 -0.073 0.012
Syria 2012 -2.156 -2.208 -2.196 0.012
Solomon Islands 2013 4.001 3.911 3.923 0.012

Barriers to funding

most_improved_lhr_funding <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_funding_test %>% 
    mutate(error_funding = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_funding = Estimate, error_funding)
) %>% 
  mutate(improved = error_funding < 0 & abs(error_funding) < abs(error_baseline)) %>% 
  mutate(diff = error_funding - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_funding)
## [1] 150
most_improved_lhr_funding %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_funding,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
Country Year Actual latent human rights value (t + 1) Predicted (baseline) Predicted (CS repression) |CS repression − baseline|
North Korea 2013 -1.687 -1.632 -1.695 0.063
Cuba 2011 -0.069 -0.021 -0.078 0.057
Eritrea 2011 -1.314 -1.261 -1.318 0.057
Eritrea 2013 -1.341 -1.298 -1.355 0.057
Eritrea 2012 -1.319 -1.288 -1.345 0.057

 


 

E2a: Civil society environment and political terror

seed_pts_prediction <- 5494  # From random.org

set.seed(seed_pts_prediction)
pred_pts_baseline <- predict(
  m_pts_baseline_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_v2csreprss <- predict(
  m_pts_v2csreprss_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

Separation plots

Baseline model

# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_baseline), 
                         FUN = match_actual, 
                         pred = pred_pts_baseline, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_baseline)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")

Model with civil society repression

pred_long_list <- lapply(1:ncol(pred_pts_v2csreprss), 
                         FUN = match_actual, 
                         pred = pred_pts_v2csreprss, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_v2csreprss)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")

Prediction diagnostics

pred_pts_baseline_categories <- pred_pts_baseline %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_v2csreprss_categories <- pred_pts_v2csreprss %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pts_cat_preds <- canary_testing_lagged %>% 
  select(country, year, actual = PTS_factor_lead1) %>% 
  mutate(pred_baseline = pred_pts_baseline_categories$predicted,
         pred_v2csreprss = pred_pts_v2csreprss_categories$predicted) %>% 
  filter(!is.na(actual)) %>%
  mutate(is_correct_baseline = actual == pred_baseline,
         is_correct_v2csreprss = actual == pred_v2csreprss)

# Improved predictions
pts_improved <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_v2csreprss) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_v2csreprss,
         `Baseline` = pred_baseline)

pts_improved %>% 
  kbl() %>% 
  kable_styling()
Country Year Actual CS repression included Baseline
Guatemala 2013 Level 3 Level 3 Level 2
Kosovo 2012 Level 1 Level 1 Level 2
Congo - Brazzaville 2011 Level 3 Level 3 Level 2
Libya 2011 Level 4 Level 4 Level 5
Bahrain 2011 Level 3 Level 3 Level 2
Bahrain 2013 Level 3 Level 3 Level 2
correct_lookup <- tribble(
  ~name, ~nice_name,
  "is_correct_v2csreprss", "CS repression included",
  "is_correct_baseline", "Baseline"
) %>% mutate(nice_name = fct_inorder(nice_name, ordered = TRUE))

pts_pred_table <- pts_cat_preds %>% 
  pivot_longer(starts_with("is_correct")) %>% 
  group_by(name, value) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  left_join(correct_lookup, by = "name") %>% 
  mutate(Prediction = ifelse(value == TRUE, "Correct", "Wrong")) %>% 
  arrange(nice_name) %>% 
  select(-name, -value) %>% 
  pivot_wider(names_from = "nice_name", values_from = "n")

pts_pred_table %>% 
  kbl(align = "lcc") %>% 
  kable_styling()
Prediction CS repression included Baseline
Wrong 111 109
Correct 374 376

 

E2b: Civil society environment and latent human rights

seed_lhr_prediction <- 3207  # From random.org

set.seed(seed_lhr_prediction)
pred_lhr_baseline <- predict(m_lhr_baseline_train, 
                                newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                                allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_v2csreprss <- predict(m_lhr_v2csreprss_train, 
                                  newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                                  allow_new_levels = TRUE)

pred_lhr_baseline_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_baseline)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_v2csreprss_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_v2csreprss)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

Improvement from baseline models

plot_baseline <- ggplot(pred_lhr_baseline_test, 
                        aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Baseline",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_v2csreprss <- ggplot(pred_lhr_v2csreprss_test, 
                          aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Added predictor",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_baseline | plot_v2csreprss

lotsa_metrics <- metric_set(rmse, rsq, mae, ccc)

bind_rows(lotsa_metrics(pred_lhr_baseline_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Baseline"),
          lotsa_metrics(pred_lhr_v2csreprss_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Added predictor")) %>% 
  select(Model, .metric, .estimate) %>% 
  pivot_wider(names_from = ".metric", values_from = ".estimate")
Model rmse rsq mae ccc
Baseline 0.1862156 0.9822788 0.1080593 0.9887926
Added predictor 0.1857038 0.9823449 0.1077390 0.9888036

Most improved countries

most_improved_lhr <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_v2csreprss_test %>% 
    mutate(error_v2csreprss = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_v2csreprss = Estimate, error_v2csreprss)
) %>% 
  # mutate(across(starts_with("error"), ~round(., 4))) %>% 
  mutate(improved = error_v2csreprss < 0 & abs(error_v2csreprss) < abs(error_baseline)) %>% 
  mutate(diff = error_v2csreprss - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr)
## [1] 127
most_improved_lhr %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_v2csreprss,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
Country Year Actual latent human rights value (t + 1) Predicted (baseline) Predicted (CS repression) |CS repression − baseline|
Libya 2011 -1.395 -1.818 -1.676 0.142
Fiji 2013 1.672 1.377 1.428 0.051
Mauritania 2013 0.558 0.595 0.555 0.040
Colombia 2011 -1.013 -1.090 -1.051 0.039
Slovakia 2011 2.019 1.908 1.943 0.035
---
title: "Predictions"
author: "Suparna Chaudhry and Andrew Heiss"
date: "Last run: `r format(Sys.time(), '%F')`"
output: 
  html_document:
    code_folding: hide
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
library(knitr)
library(kableExtra)
knit_print.data.frame <- function(x, ...) {
  res <- paste(c('', '', kable_styling(kable(x, booktabs = TRUE))), collapse = '\n')
  asis_output(res)
}

registerS3method("knit_print", "data.frame", knit_print.data.frame)
registerS3method("knit_print", "grouped_df", knit_print.data.frame)

knitr::opts_chunk$set(fig.retina = 3,
                      tidy.opts = list(width.cutoff = 120),  # For code
                      options(width = 90),  # For output
                      fig.asp = 0.618, fig.width = 7, 
                      fig.align = "center", out.width = "85%")

options(dplyr.summarise.inform = FALSE,
        knitr.kable.NA = "")
```

```{r load-libraries-data, message=FALSE, warning=FALSE}
library(tidyverse)
library(targets)
library(yardstick)
library(broom)
library(patchwork)
library(scales)
library(here)

# Load data
# Need to use this withr thing because tar_read() and tar_load() need to see the
# _targets folder in the current directory, but this .Rmd file is in a subfolder
withr::with_dir(here::here(), {
  source(tar_read(plot_funs))
  
  canary_testing_lagged <- tar_read(panel_testing_lagged)
  
  # Load big list of models
  model_df <- tar_read(model_df) %>% 
    filter(str_detect(model, "baseline") | str_detect(model, "v2csreprss"))
  
  # Load actual model objects
  tar_load(c(m_pts_baseline_train, m_pts_v2csreprss_train,
             m_pts_total_train, m_pts_advocacy_train, 
             m_pts_entry_train, m_pts_funding_train,
             m_lhr_baseline_train, m_lhr_v2csreprss_train,
             m_lhr_total_train, m_lhr_advocacy_train, 
             m_lhr_entry_train, m_lhr_funding_train))
})

pts_levels <- levels(canary_testing_lagged$PTS_factor)

# Returns a data frame of predicted probabilities with actual = 1 
# when the response outcome happens
match_actual <- function(x, pred, actual) {
  pred %>% 
    select(fitted = {{x}}) %>% 
    mutate(actual = as.numeric(actual == colnames(pred[x])),
           plot_level = colnames(pred[x]))
}
```

# E~1a~: NGO laws and political terror

```{r e1a-predictions, warning=FALSE}
seed_pts_prediction <- 5494  # From random.org

set.seed(seed_pts_prediction)
pred_pts_baseline <- predict(
  m_pts_baseline_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_total <- predict(
  m_pts_total_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_advocacy <- predict(
  m_pts_advocacy_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_entry <- predict(
  m_pts_entry_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_funding <- predict(
  m_pts_funding_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)
```

## Separation plots

### Baseline models

```{r e1a-separation-baseline}
# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_baseline), 
                         FUN = match_actual, 
                         pred = pred_pts_baseline, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_baseline)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

### Models with NGO laws

#### Total legal barriers

```{r e1a-separation-total}
# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_total), 
                         FUN = match_actual, 
                         pred = pred_pts_total, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_total)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

#### Barriers to advocacy

```{r e1a-separation-advocacy}
# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_advocacy), 
                         FUN = match_actual, 
                         pred = pred_pts_advocacy, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_advocacy)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

#### Barriers to entry

```{r e1a-separation-entry}
# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_entry), 
                         FUN = match_actual, 
                         pred = pred_pts_entry, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_entry)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

#### Barriers to funding

```{r e1a-separation-funding}
# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_funding), 
                         FUN = match_actual, 
                         pred = pred_pts_funding, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_funding)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

## Prediction diagnostics

```{r e1a-improved-cases}
pred_pts_baseline_categories <- pred_pts_baseline %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_total_categories <- pred_pts_total %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_advocacy_categories <- pred_pts_advocacy %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_entry_categories <- pred_pts_entry %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_funding_categories <- pred_pts_funding %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)


pts_cat_preds <- canary_testing_lagged %>% 
  select(country, year, actual = PTS_factor_lead1) %>% 
  mutate(pred_baseline = pred_pts_baseline_categories$predicted,
         pred_total = pred_pts_total_categories$predicted,
         pred_advocacy = pred_pts_advocacy_categories$predicted,
         pred_entry = pred_pts_entry_categories$predicted,
         pred_funding = pred_pts_funding_categories$predicted) %>% 
  filter(!is.na(actual)) %>%
  mutate(is_correct_baseline = actual == pred_baseline,
         is_correct_total = actual == pred_total,
         is_correct_advocacy = actual == pred_advocacy,
         is_correct_entry = actual == pred_entry,
         is_correct_funding = actual == pred_funding)

# Improved predictions
pts_improved_total <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_total) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_total,
         `Baseline` = pred_baseline)

pts_improved_advocacy <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_advocacy) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_advocacy,
         `Baseline` = pred_baseline)

pts_improved_entry <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_entry) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_entry,
         `Baseline` = pred_baseline)

pts_improved_funding <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_funding) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_funding,
         `Baseline` = pred_baseline)

pts_improved_total %>% 
  kbl(caption = "Cases improved with total legal barriers") %>% 
  kable_styling()

pts_improved_advocacy %>% 
  kbl(caption = "Cases improved with barriers to advocacy") %>% 
  kable_styling()

pts_improved_entry %>% 
  kbl(caption = "Cases improved with barriers to entry") %>% 
  kable_styling()

pts_improved_funding %>% 
  kbl(caption = "Cases improved with barriers to funding") %>% 
  kable_styling()
```

```{r e1a-correct-not-correct}
correct_lookup <- tribble(
  ~name, ~nice_name,
  "is_correct_total", "Total legal barriers included",
  "is_correct_advocacy", "Barriers to advocacy included",
  "is_correct_entry", "Barriers to entry included",
  "is_correct_funding", "Barriers to funding included",
  "is_correct_baseline", "Baseline"
) %>% mutate(nice_name = fct_inorder(nice_name, ordered = TRUE))

pts_pred_table <- pts_cat_preds %>% 
  pivot_longer(starts_with("is_correct")) %>% 
  group_by(name, value) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  left_join(correct_lookup, by = "name") %>% 
  mutate(Prediction = ifelse(value == TRUE, "Correct", "Wrong")) %>% 
  arrange(nice_name) %>% 
  select(-name, -value) %>% 
  pivot_wider(names_from = "nice_name", values_from = "n")

pts_pred_table %>% 
  kbl(align = "lccccc") %>% 
  kable_styling()
```

\ 

# E~1b~: NGO laws and latent human rights

```{r e1d-predictions, warning=FALSE}
seed_lhr_prediction <- 3207  # From random.org

set.seed(seed_lhr_prediction)
pred_lhr_baseline <- predict(m_lhr_baseline_train, 
                             newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                             allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_total <- predict(m_lhr_total_train, 
                          newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                          allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_advocacy <- predict(m_lhr_advocacy_train, 
                             newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                             allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_entry <- predict(m_lhr_entry_train, 
                          newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                          allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_funding <- predict(m_lhr_funding_train, 
                            newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                            allow_new_levels = TRUE)

pred_lhr_baseline_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_baseline)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_total_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_total)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_advocacy_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_advocacy)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_entry_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_entry)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_funding_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_funding)) %>% 
  filter(!is.na(latent_hr_mean_lead1))
```

## Improvement from baseline models

```{r e1d-pred-plots, warning=FALSE}
plot_baseline <- ggplot(pred_lhr_baseline_test, 
                        aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Baseline",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_total <- ggplot(pred_lhr_total_test, 
                     aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Total legal barriers",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_advocacy <- ggplot(pred_lhr_advocacy_test, 
                        aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Barriers to advocacy",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_entry <- ggplot(pred_lhr_entry_test, 
                     aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Barriers to entry",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_funding <- ggplot(pred_lhr_funding_test, 
                       aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Barriers to funding",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

(plot_baseline | plot_total) / (plot_advocacy | plot_entry | plot_funding)
```

```{r e1d-metrics}
lotsa_metrics <- metric_set(rmse, rsq, mae, ccc)

bind_rows(lotsa_metrics(pred_lhr_baseline_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Baseline"),
          lotsa_metrics(pred_lhr_total_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Total legal barriers"),
          lotsa_metrics(pred_lhr_advocacy_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Barriers to advocacy"),
          lotsa_metrics(pred_lhr_entry_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Barriers to entry"),
          lotsa_metrics(pred_lhr_funding_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Barriers to funding")) %>% 
  select(Model, .metric, .estimate) %>% 
  pivot_wider(names_from = ".metric", values_from = ".estimate")
```

## Most improved countries

### Total legal barriers

```{r e1d-most-improved-total}
most_improved_lhr_total <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_total_test %>% 
    mutate(error_total = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_total = Estimate, error_total)
) %>% 
  mutate(improved = error_total < 0 & abs(error_total) < abs(error_baseline)) %>% 
  mutate(diff = error_total - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_total)

most_improved_lhr_total %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_total,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
```

### Barriers to advocacy

```{r e1d-most-improved-advocacy}
most_improved_lhr_advocacy <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_advocacy_test %>% 
    mutate(error_advocacy = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_advocacy = Estimate, error_advocacy)
) %>% 
  mutate(improved = error_advocacy < 0 & abs(error_advocacy) < abs(error_baseline)) %>% 
  mutate(diff = error_advocacy - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_advocacy)

most_improved_lhr_advocacy %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_advocacy,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
```

### Barriers to entry

```{r e1d-most-improved-entry}
most_improved_lhr_entry <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_entry_test %>% 
    mutate(error_entry = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_entry = Estimate, error_entry)
) %>% 
  mutate(improved = error_entry < 0 & abs(error_entry) < abs(error_baseline)) %>% 
  mutate(diff = error_entry - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_entry)

most_improved_lhr_entry %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_entry,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
```

### Barriers to funding

```{r e1d-most-improved-funding}
most_improved_lhr_funding <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_funding_test %>% 
    mutate(error_funding = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_funding = Estimate, error_funding)
) %>% 
  mutate(improved = error_funding < 0 & abs(error_funding) < abs(error_baseline)) %>% 
  mutate(diff = error_funding - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr_funding)

most_improved_lhr_funding %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_funding,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
```

\ 

---

\ 

# E~2a~: Civil society environment and political terror

```{r e2a-predictions, warning=FALSE}
seed_pts_prediction <- 5494  # From random.org

set.seed(seed_pts_prediction)
pred_pts_baseline <- predict(
  m_pts_baseline_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)

set.seed(seed_pts_prediction)
pred_pts_v2csreprss <- predict(
  m_pts_v2csreprss_train, 
  newdata = select(canary_testing_lagged, -PTS_factor_lead1),
  allow_new_levels = TRUE
) %>% 
  as_tibble() %>% 
  magrittr::set_colnames(pts_levels)
```


## Separation plots

### Baseline model

```{r e2a-separation-baseline}
# Match each column in pred_mat (predicted probabilities for each response level)
pred_long_list <- lapply(1:ncol(pred_pts_baseline), 
                         FUN = match_actual, 
                         pred = pred_pts_baseline, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_baseline)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

### Model with civil society repression

```{r e2a-separation-cs}
pred_long_list <- lapply(1:ncol(pred_pts_v2csreprss), 
                         FUN = match_actual, 
                         pred = pred_pts_v2csreprss, 
                         actual = as.character(canary_testing_lagged$PTS_factor_lead1))

plot_data <- pred_long_list %>% 
  bind_rows() %>% 
  filter(!is.nan(fitted), !is.na(actual)) %>% 
  group_by(plot_level) %>% 
  arrange(plot_level, fitted) %>% 
  mutate(index = row_number()) %>% 
  ungroup()

markers <- plot_data %>% 
  group_by(plot_level) %>%
  summarize(obs = n(), expected = sum(fitted),
            marker = obs - expected + 1)

n_in_group <- nrow(pred_pts_v2csreprss)

plot_data_condensed <- plot_data %>% 
  # rle(), dplyr-style: https://stackoverflow.com/a/33510765/120898
  mutate(rleid = (actual != lag(actual, 1, default = 0))) %>%
  mutate(rleid = cumsum(rleid)) %>% 
  group_by(plot_level, rleid) %>% 
  slice(1) %>% 
  group_by(plot_level) %>% 
  mutate(xmin = index, xmax = lead(index, default = n_in_group)) %>% 
  ungroup()

ggplot(plot_data_condensed) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = 0, ymax = 1, fill = as.factor(actual))) + 
  geom_line(aes(x = index, y = fitted), size = 0.3) +
  geom_point(data = markers, aes(x = marker, y = -0.05), shape = 17, size = 3) + 
  scale_fill_manual(values = c("grey90", "#2ECC40")) +
  guides(fill = "none") +
  facet_wrap(vars(plot_level), ncol = 1, scales = "free_x") +
  theme_void(base_family = "Inter Bold")
```

## Prediction diagnostics

```{r e2a-improved-cases}
pred_pts_baseline_categories <- pred_pts_baseline %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pred_pts_v2csreprss_categories <- pred_pts_v2csreprss %>% 
  rownames_to_column() %>% 
  pivot_longer(-rowname) %>% 
  group_by(rowname) %>% 
  filter(rank(-value, ties.method = "last") == 1) %>% 
  ungroup() %>% 
  select(predicted = name)

pts_cat_preds <- canary_testing_lagged %>% 
  select(country, year, actual = PTS_factor_lead1) %>% 
  mutate(pred_baseline = pred_pts_baseline_categories$predicted,
         pred_v2csreprss = pred_pts_v2csreprss_categories$predicted) %>% 
  filter(!is.na(actual)) %>%
  mutate(is_correct_baseline = actual == pred_baseline,
         is_correct_v2csreprss = actual == pred_v2csreprss)

# Improved predictions
pts_improved <- pts_cat_preds %>% 
  filter(!is_correct_baseline & is_correct_v2csreprss) %>% 
  select(Country = country, Year = year, Actual = actual,
         `CS repression included` = pred_v2csreprss,
         `Baseline` = pred_baseline)

pts_improved %>% 
  kbl() %>% 
  kable_styling()
```

```{r e2a-correct-not-correct}
correct_lookup <- tribble(
  ~name, ~nice_name,
  "is_correct_v2csreprss", "CS repression included",
  "is_correct_baseline", "Baseline"
) %>% mutate(nice_name = fct_inorder(nice_name, ordered = TRUE))

pts_pred_table <- pts_cat_preds %>% 
  pivot_longer(starts_with("is_correct")) %>% 
  group_by(name, value) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  left_join(correct_lookup, by = "name") %>% 
  mutate(Prediction = ifelse(value == TRUE, "Correct", "Wrong")) %>% 
  arrange(nice_name) %>% 
  select(-name, -value) %>% 
  pivot_wider(names_from = "nice_name", values_from = "n")

pts_pred_table %>% 
  kbl(align = "lcc") %>% 
  kable_styling()
```

\ 

# E~2b~: Civil society environment and latent human rights

```{r e2b-predictions, warning=FALSE}
seed_lhr_prediction <- 3207  # From random.org

set.seed(seed_lhr_prediction)
pred_lhr_baseline <- predict(m_lhr_baseline_train, 
                                newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                                allow_new_levels = TRUE)

set.seed(seed_lhr_prediction)
pred_lhr_v2csreprss <- predict(m_lhr_v2csreprss_train, 
                                  newdata = select(canary_testing_lagged, -latent_hr_mean_lead1),
                                  allow_new_levels = TRUE)

pred_lhr_baseline_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_baseline)) %>% 
  filter(!is.na(latent_hr_mean_lead1))

pred_lhr_v2csreprss_test <- canary_testing_lagged %>% 
  select(country, gwcode, year, latent_hr_mean_lead1) %>% 
  bind_cols(as_tibble(pred_lhr_v2csreprss)) %>% 
  filter(!is.na(latent_hr_mean_lead1))
```

## Improvement from baseline models

```{r e2b-pred-plots, warning=FALSE}
plot_baseline <- ggplot(pred_lhr_baseline_test, 
                        aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Baseline",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_v2csreprss <- ggplot(pred_lhr_v2csreprss_test, 
                          aes(x = latent_hr_mean_lead1, y = Estimate)) +
  geom_abline(lty = 2) +
  geom_point(alpha = 0.5) +
  labs(title = "Added predictor",
       x = "Actual latent human rights value (t + 1)",
       y = "Predicted latent human rights value (t + 1)") +
  coord_equal() +
  theme_ngo()

plot_baseline | plot_v2csreprss
```

```{r e2b-metrics}
lotsa_metrics <- metric_set(rmse, rsq, mae, ccc)

bind_rows(lotsa_metrics(pred_lhr_baseline_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Baseline"),
          lotsa_metrics(pred_lhr_v2csreprss_test, 
                        truth = latent_hr_mean_lead1, 
                        estimate = Estimate) %>% 
            mutate(Model = "Added predictor")) %>% 
  select(Model, .metric, .estimate) %>% 
  pivot_wider(names_from = ".metric", values_from = ".estimate")
```

## Most improved countries

```{r e2b-most-improved}
most_improved_lhr <- bind_cols(
  pred_lhr_baseline_test %>% 
    mutate(error_baseline = Estimate - latent_hr_mean_lead1) %>% 
    select(country, gwcode, year, latent_hr_mean_lead1, 
           estimate_baseline = Estimate, error_baseline),
  pred_lhr_v2csreprss_test %>% 
    mutate(error_v2csreprss = Estimate - latent_hr_mean_lead1) %>% 
    select(estimate_v2csreprss = Estimate, error_v2csreprss)
) %>% 
  # mutate(across(starts_with("error"), ~round(., 4))) %>% 
  mutate(improved = error_v2csreprss < 0 & abs(error_v2csreprss) < abs(error_baseline)) %>% 
  mutate(diff = error_v2csreprss - error_baseline) %>% 
  mutate(abs_diff = abs(diff)) %>% 
  filter(improved) %>% 
  arrange(desc(abs_diff))

nrow(most_improved_lhr)

most_improved_lhr %>% 
  select(Country = country, Year = year, 
         `Actual latent human rights value (t + 1)` = latent_hr_mean_lead1,
         `Predicted (baseline)` = estimate_baseline,
         `Predicted (CS repression)` = estimate_v2csreprss,
         `|CS repression − baseline|` = abs_diff) %>% 
  head(5) %>% 
  kbl(digits = 3, align = "lccccc") %>% 
  kable_styling()
```
