Data and code

Replication data and code available at GitHub.

library(tidyverse)
library(forcats)
library(stringr)
library(ggstance)
library(ggforce)
library(maptools)
library(rgdal)
library(scales)
library(zoo)

# Useful functions
theme_gpa <- function(base_size=10, base_family="Clear Sans") {
  update_geom_defaults("bar", list(fill = "grey30"))
  update_geom_defaults("line", list(colour = "grey30"))
  update_geom_defaults("label", list(family="Clear Sans"))
  update_geom_defaults("text", list(family="Clear Sans"))
  ret <- theme_bw(base_size, base_family) + 
    theme(panel.background = element_rect(fill="#ffffff", colour=NA),
          axis.title.y = element_text(margin = margin(r = 10)),
          axis.title.x = element_text(margin = margin(t = 10)),
          axis.text = element_text(colour="black"),
          title=element_text(vjust=1.2, family="Clear Sans", face="bold"),
          plot.subtitle=element_text(family="Clear Sans"),
          plot.caption=element_text(family="Clear Sans",
                                    size=rel(0.8), colour="grey70"),
          panel.border = element_blank(), 
          axis.line=element_line(colour="grey50", size=0.2),
          #panel.grid=element_blank(), 
          axis.ticks=element_blank(),
          legend.position="bottom", 
          legend.title=element_text(size=rel(0.8)),
          axis.title=element_text(size=rel(0.8), family="Clear Sans", face="bold"),
          strip.text=element_text(size=rel(1), family="Clear Sans", face="bold"),
          strip.background=element_rect(fill="#ffffff", colour=NA),
          panel.spacing.y=unit(1.5, "lines"),
          legend.key=element_blank(),
          legend.spacing=unit(0.2, "lines"))
  
  ret
}

theme_blank_map <- function(base_size=9.5, base_family="Clear Sans") {
  ret <- theme_bw(base_size, base_family) + 
    theme(panel.background = element_rect(fill="#ffffff", colour=NA),
          panel.border=element_blank(), axis.line=element_blank(),
          panel.grid=element_blank(), axis.ticks=element_blank(),
          axis.title=element_blank(), axis.text=element_blank())
  ret
}

fig.save.cairo <- function(fig, filepath=file.path(PROJHOME, "Output"), 
                           filename, width, height, units="in", seed=NULL, ...) {
  if (!is.null(seed)) set.seed(seed)
  ggsave(fig, filename=file.path(filepath, paste0(filename, ".pdf")),
         width=width, height=height, units=units, device=cairo_pdf, ...)
  if (!is.null(seed)) set.seed(seed)
  ggsave(fig, filename=file.path(filepath, paste0(filename, ".png")),
         width=width, height=height, units=units, type="cairo", dpi=300, ...)
}

# Clean up the interval labels created by cut()
# Use with forcats::fct_relabel
clean.cut.range <- function(x) {
  # If the level starts with "(", strip all the ( and [s and add 1 to the first value
  need.to.clean <- str_detect(x, "^\\(")
  cleaned <- str_replace_all(x[need.to.clean], "\\(|\\]", "") %>% 
    map_chr(function(x) {
      x.split <- as.numeric(str_split(x, ",", simplify=TRUE))
      paste0(x.split[1] + 1, "–", x.split[2])
    })
}

my_percent <- percent_format(accuracy = 1)


# Load clean data

Figure 1: Cumulative number of GPAs.

year.chunks <- tribble(
  ~chunk_start, ~chunk_end, ~chunk_name,
  1900,         1969,       "Pre-1970",
  1970,         1974,       "1970–74",
  1975,         1979,       "1975–79",
  1980,         1984,       "1980–84",
  1985,         1989,       "1985–89",
  1990,         1994,       "1990–94",
  1995,         1999,       "1995–99",
  2000,         2004,       "2000–04",
  2005,         2009,       "2005–09",
  2010,         2014,       "2010–14",
  2015,         2020,       "2015+"
) %>%
  mutate(chunk_name = ordered(fct_inorder(chunk_name)))

year.chunks.long <- year.chunks %>%
  rowwise() %>%
  summarise(chunk_name = chunk_name, years = list(chunk_start:chunk_end)) %>%
  unnest()

years.active <- gpa.data.clean %>% 
  filter(!is.na(start_year)) %>% 
  rowwise() %>%
  summarise(gpa_id = gpa_id, years = list(start_year:recent_year), 
            last.active = recent_year) %>%
  unnest() %>%
  mutate(active = TRUE)

gpas.active.over.time <- years.active %>%
  expand(gpa_id, years) %>%
  left_join(years.active, by=c("gpa_id", "years")) %>%
  group_by(gpa_id) %>%
  mutate(last.active = na.locf(last.active, na.rm=FALSE)) %>%
  # Get rid of rows with absolutely no data
  filter((!is.na(last.active) | !is.na(active))) %>%
  # Carry forward the active status of the last year marked if the GPA is still
  # active (i.e. last_active is >= 2014). Otherwise, mark as inactive and carry
  # forward.
  ungroup() %>%
  mutate(imputed.active = case_when(
    is.na(.$active) & .$last.active >= 2014 ~ TRUE,
    is.na(.$active) & .$last.active < 2014 ~ FALSE,
    TRUE ~ .$active
  )) %>%
  # Keep all active years; only keep the first inactive year
  group_by(gpa_id, imputed.active) %>%
  filter(imputed.active == TRUE | 
           (imputed.active == FALSE & row_number() == 1)) %>%
  ungroup()

gpa.cum.plot <- gpas.active.over.time %>%
  left_join(year.chunks.long, by="years") %>%
  group_by(gpa_id, chunk_name, imputed.active) %>%
  slice(1) %>%
  group_by(chunk_name, imputed.active) %>%
  summarise(num = n()) %>%
  group_by(imputed.active) %>%
  # Calculate cumulative total
  mutate(cum_total = cumsum(num)) %>%
  ungroup() %>%
  # Only use cumulative total for inactive ones, since they drop out of the
  # data and aren't repeated like the active ones
  mutate(plot_value = ifelse(imputed.active, num, cum_total)) %>%
  mutate(active = factor(imputed.active, levels=c(TRUE, FALSE),
                         labels=c("Active GPIs in period", "Cumulative discontinued GPIs"),
                         ordered=TRUE)) %>% 
  filter(chunk_name != "2015+")

gpas.active <- gpa.data.clean %>% filter(active == TRUE) %>% nrow
gpas.defunct <- gpa.data.clean %>% filter(active == FALSE) %>% nrow

Source: Authors’ database.

Note: “Active” denotes GPAs that were maintained in the given time period. GPAs that have not been updated since 2014 are marked as “Discontinued” in the year following the last active year.

N = 138 active GPAs in 2015; 21 total discontinued GPAs.

Collapsed subjects

Several of the smaller subject areas are collapsed into larger overarching categories, listed in the table below:

Subject (collapsed) Subject (original)
Education Education
Development Development
Economic Economic
Environment Environment
Environment Energy
Governance Governance
Human Rights Gender
Human Rights Human Rights
Human Rights Press Freedom
Security Security
Social Social
Social Health
Trade & Finance Finance
Trade & Finance Technology
Trade & Finance Trade

Figure 2 (new): Number of GPAs, by issue and creator type.

Source: Authors’ database.

Note: Includes only “active” GPAs as of 2014; excludes defunct cases. Note that the total count of GPAs is larger than in Figure 1 because we have double counted cases that straddle issue areas, such as health and development. Overlapping and unknown creators are ommitted.

N = 138.

Figure X: GPA creators, by type (GPAs active as of 2014)

The figure below shows the distribution of the creator types among the currently active GPAs in our database, showing that 52% percent of GPAs are created by NGOs, while only 30% are in the direct control of states or IGOs.

Source: Authors’ database.

N = 138.

Figure 4: Country of GPA source headquarters (maps).

Source: Authors’ data

N = 17 countries.

Figure 5: Pathways of GPA influence.

Source: Adapted from Kelley and Simmons, 2015.

(located at ./Output/figure-5-pathways.pdf and ./Output/figure-5-pathways.png)

Figure X: Pseudo-networks of indicator creators over time

Source: Authors’ data

gpa.data.clean.creator.long <- gpa.data.clean %>%
  separate_rows(subject_collapsed, sep=",") %>%
  mutate(subject_collapsed = str_trim(subject_collapsed))

subject.counts <- gpa.data.clean.creator.long %>%
  count(subject_collapsed) %>%
  arrange(desc(n))

gpa.creator.cumulative <- gpa.data.clean.creator.long %>%
  filter(!is.na(start_year)) %>% 
  filter(!is.na(creator_collapsed)) %>%
  # Infer death year based on most recent year if not active
  mutate(end_year = ifelse(active == 0, recent_year, 2015)) %>%
  # Create list of years, like 1998:2005
  mutate(year = map2(start_year, end_year, ~ seq(.x, .y))) %>%
  # Unnest list of years
  unnest(year) %>%
  filter(year > 1970) %>%
  # Divide years into five-year chunks
  mutate(pentad = cut(year, breaks=seq(1970, 2015, 5), ordered_result=TRUE),
         pentad = fct_relabel(pentad, clean.cut.range)) %>%
  # Only select the first year in the pentad. Without this, indexes appear up
  # to five times in the pentad.
  filter(!is.na(pentad)) %>%
  group_by(pentad, gpa_id) %>%
  slice(1) %>%
  ungroup() %>%
  left_join(subject.counts, by="subject_collapsed") %>%
  arrange(pentad, desc(n)) %>%
  mutate(subject_collapsed = ordered(fct_inorder(subject_collapsed)),
         creator_collapsed = ordered(fct_relevel(creator_collapsed,
                                                 "NGO", "IGO", "State",
                                                 "University or Private",
                                                 "Other")))

# Export data
gpa.creator.cumulative %>% 
  select(gpa_id, gpa_name, start_year, recent_year, 
         subject = subject_collapsed, creator = creator_collapsed) %>% 
  arrange(start_year) %>% 
  write_csv(path = file.path(PROJHOME, "Output", "gpa_creator_cumulative.csv"))
 
# gpa.creator.cumulative %>%
#   count(pentad, subject_collapsed) %>%
#   spread(pentad, nn) %>%
#   write_csv("~/Desktop/gpa_subject_pentads.csv", na = "")
# 
# gpa.creator.cumulative %>%
#   count(creator_collapsed, subject_collapsed) %>%
#   spread(creator_collapsed, nn) %>%
#   write_csv("~/Desktop/gpa_subject_creators.csv", na = "")
# 
# gpa.creator.cumulative %>%
#   count(pentad, subject_collapsed, creator_collapsed) %>%
#   spread(pentad, nn) %>%
#   write_csv("~/Desktop/gpa_subject_creator_pentads.csv", na = "")