knitr::opts_chunk$set(fig.retina = 2,
tidy.opts=list(width.cutoff = 100), # For code
options(width = 100)) # For output
library(tidyverse)
library(readxl)
library(haven)
library(stringr)
library(lubridate)
library(countrycode)
library(WDI)
library(rvest)
library(DT)

Collect original data

3P Trafficking Index

Cho et al.’s 3P Trafficking Index uses World Bank codes which don’t always perfectly correspond with ISO3 or COW codes. We ignore some countries and make manual changes for others.

We fix these:

  • BHU (Bhutan): 760 / BTN / BT
  • KSV (Kosovo): 347 / XKK / XK
  • RMI (Marshall Islands): 983 / MHL / MH
  • HKG (Hong Kong): 715
  • SRB (Serbia): 340

We ignore these:

  • BES (Caribbean Netherlands): no cowcode / no ISO
  • ABW (Aruba): no cowcode
  • ANT (Netherlands Antilles): no cowcode
  • CUW (Curacao): no cowcode
  • MAC (Macao): no cowcode

Year range: Consistent data up to 2015

cho.raw <- read_excel("data/Cho 3Ps/3P_Index_2000-2015.xlsx", skip = 2)
countries.to.ignore <- c("ABW", "ANT", "BES", "CUW", "MAC")
wb2iso2 <- c(BES = NA, BHU = "BT", KSV = "XK", RMI = "MH")
wb2iso3 <- c(BES = NA, BHU = "BTN", KSV = "XKK", RMI = "MHL")
wb2cow <- c(BHU = 760L, HKG = 715L, KSV = 347L, RMI = 983L, SRB = 340L)
wb_ignore <- c(ABW = NA, AIA = NA, ANT = NA, ASM = NA, BES = NA, BMU = NA,
COK = NA, CUW = NA, CYM = NA, GRL = NA, GUF = NA, GUM = NA,
JEY = NA, MAC = NA, MTQ = NA, NCL = NA, NIU = NA, PRI = NA,
REU = NA, VIR = NA, WBG = NA)
countryname2cow <- c(`European Union` = NA, Serbia = 340L)
iso22cow <- c(HK = 715L, RS = 340L, XK = 347L)
iso32cow <- c(HKG = 715L, SRB = 340L, SCG = 345L, XKK = 347L, PSE = 669L)
cow2iso2 <- c(`340` = "RS", `347` = "XK", `715` = "HK")
cow2iso3 <- c(`340` = "SRB", `347` = "XKK", `715` = "HKG")
cho.clean <- cho.raw %>%
mutate(iso2 = countrycode(Code, "wb", "iso2c", custom_match = wb2iso2),
iso3 = countrycode(Code, "wb", "iso3c", custom_match = wb2iso3),
cowcode = countrycode(Code, "wb", "cown",
custom_match = c(wb2cow, wb_ignore))) %>%
filter(!(Code %in% countries.to.ignore)) %>%
select(year = Year, iso2, iso3, cowcode,
prosecution = Prosecution, protection = Protection,
prevention = Prevention, p = `Overall 3P`) %>%
mutate_at(vars(year, cowcode, prosecution, protection, prevention, p),
funs(as.integer)) %>%
mutate(prot_prev = protection + prevention)
empty.panel <- cho.clean %>%
expand(cowcode, year)
cho.clean %>% glimpse()
## Observations: 2,868
## Variables: 9
## $ year        <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 201...
## $ iso2        <chr> "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF...
## $ iso3        <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG"...
## $ cowcode     <int> 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, ...
## $ prosecution <int> NA, 1, NA, 2, 3, 3, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2...
## $ protection  <int> NA, 1, NA, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 3, 2...
## $ prevention  <int> NA, 1, NA, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 2, 4, 3...
## $ p           <int> NA, 3, NA, 8, 9, 8, 7, 6, 6, 7, 6, 6, 9, 9, 9, 9, 6, 6, 8, 7, 8, 8, 7, 9, 7...
## $ prot_prev   <int> NA, 2, NA, 6, 6, 5, 5, 4, 4, 5, 4, 4, 5, 5, 5, 5, 4, 4, 6, 5, 6, 6, 5, 7, 5...

Control of corruption

We use control of corruption from the World Bank’s Worldwide Governance Indicators data. The official(?) website only offers WGI data as a horrific Excel file, but the WGI’s pseudo-page at the World Bank provides a link to a CSV(!).

There are more countries in the WGI data than in the 3P data. We ignore these:

  • AIA = Anguilla
  • ASM = American Samoa
  • BMU = Bermuda
  • COK = Cook Islands
  • CYM = Cayman Islands
  • GRL = Greenland
  • GUF = French Guiana
  • GUM = Guam
  • JEY = Jersey, Channel Islands
  • MTQ = Martinique
  • NCL = New Caledonia
  • NIU = Niue
  • PRI = Puerto Rico
  • REU = Réunion
  • VIR = Virgin Islands
  • WBG = West Bank and Gaza

Year range: Consistent data up to 2015

wgi.raw <- read_csv("data/WGI/WGI_Data.csv")
corruption <- wgi.raw %>%
filter(`Indicator Code` == "CC.EST") %>%
mutate(cowcode = countrycode(`Country Code`, "wb", "cown",
custom_match = c(wb2cow, wb_ignore))) %>%
filter(!is.na(cowcode)) %>%
select(cowcode, matches("\\d{4}")) %>%
gather(year, corruption, -cowcode) %>%
mutate(year = as.integer(year), cowcode = as.integer(cowcode)) %>%
filter(year >= 2000)
corruption %>% glimpse()
## Observations: 2,940
## Variables: 3
## $ cowcode    <int> 970, 700, 339, 615, 232, 540, 58, 160, 371, 900, 305, 373, 31, 692, 771, 53,...
## $ year       <int> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000...
## $ corruption <dbl> NA, -1.91376245, -0.82448280, -0.94872248, 1.38485932, -1.51669121, 0.913627...

Democracy

We use democracy data from Polity IV.

Year range: Consistent data up to 2015

polity.raw <- read_excel("data/Polity/p4v2015.xls")
polity <- polity.raw %>%
select(cowcode = ccode, year, polity = polity2) %>%
mutate_at(vars(year, cowcode, polity),
funs(as.integer)) %>%
filter(year >= 2000)
polity %>% glimpse()
## Observations: 2,647
## Variables: 3
## $ cowcode <int> 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700,...
## $ year    <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2...
## $ polity  <int> -7, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, -1, 5, 5, 7, 7, 7, ...

Women legislators

We use data on the percent of women legislators from the World Bank’s Gender Statistics database.

Dozens of countries don’t correspond to COW codes here, mostly becuase the data includes observations for regions like the Arab World (ARB) and Fragile and conflict affected situations (FCS). Rather than explicitly define every code to ignore, we just implicitly ignore unmatched codes.

I’m super curious to know what the original authors did with this data, too. 72% is missing!

Year range: Super inconsistent data up to 2011, only a few observations in 2012–2014

gender.raw <- read_csv("data/WB gender stats/Gender_Stat_Data.csv")
gender <- gender.raw %>%
filter(`Indicator Code` == "SG.GEN.LSOM.ZS") %>%
mutate(cowcode = countrycode(`Country Code`, "wb", "cown",
custom_match = c(wb2cow, wb_ignore))) %>%
filter(!is.na(cowcode)) %>%
select(cowcode, matches("\\d{4}")) %>%
gather(year, female.leg.prop, -cowcode) %>%
mutate(year = as.integer(year), cowcode = as.integer(cowcode)) %>%
filter(year >= 2000)
gender %>% glimpse()
## Observations: 3,315
## Variables: 3
## $ cowcode         <int> 700, 339, 615, 232, 540, 58, 160, 371, 900, 305, 373, 31, 692, 771, 53,...
## $ year            <int> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,...
## $ female.leg.prop <dbl> NA, NA, NA, NA, NA, NA, 27.39945, NA, 32.92588, 30.08345, NA, NA, NA, N...

Women’s economic rights

We use data on women’s economic rights from CIRI.

There are three countries with cowcode == 345 that cover three different time periods:

  • Yugoslavia: 1981-1991
  • Serbia and Montenegro: 1992-1999, 2003-2005
  • Yugoslavia, Federal Republic of: 2000-2002

We collapse them all into one continuous country.

Year range: Consistent data up to 2011

ciri.raw <- read_csv("data/CIRI/CIRI Data 1981_2011 2014.04.14.csv",
na = c("-999", "-77", "-66"))
wecon <- ciri.raw %>%
mutate(COW = case_when(
.$CTRY == "Kosovo" ~ 347L,
.$CTRY == "Serbia" ~ 340L,
.$CTRY == "Montenegro" ~ 341L,
TRUE ~ .$COW
)) %>%
filter(CTRY != "Soviet Union") %>%
filter(!(CTRY == "Yugoslavia" & !(YEAR %in% 1981:1991)),
!(CTRY == "Yugoslavia, Federal Republic of" & !(YEAR %in% 2000:2002)),
!(CTRY == "Serbia and Montenegro" & !(YEAR %in% c(1992:1999, 2003:2005)))) %>%
select(cowcode = COW, year = YEAR, wecon = WECON) %>%
filter(year >= 2000)
wecon %>% glimpse()
## Observations: 2,382
## Variables: 3
## $ cowcode <int> 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 339, 339, 339, 339,...
## $ year    <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2000, 2...
## $ wecon   <int> 0, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1,...

International regime membership

We use data from from the UN’s treaty database on Palermo Protocol ratifications.

Year range: Consistent data up to 2015

treaty.url <- "https://web.archive.org/web/20160403063433/https://treaties.un.org/Pages/ViewDetails.aspx?src=TREATY&mtdsg_no=XVIII-12-a&chapter=18&lang=en"
treaty.file <- file.path("data/Palermo protocol/ratifications.html")
if (!file.exists(treaty.file)) {
download.file(treaty.url, "data/Palermo protocol/ratifications.html")
}
ratifications.raw <- read_html(treaty.file)
ratifications <- ratifications.raw %>%
html_nodes(xpath='//*[@id="ctl00_ContentPlaceHolder1_tblgrid"]') %>%
html_table(header=TRUE) %>% bind_rows() %>%
magrittr::set_colnames(c("participant", "signature", "ratification")) %>%
mutate(cowcode = countrycode(participant, "country.name", "cown",
custom_match = countryname2cow)) %>%
mutate_at(vars(signature, ratification),
funs(str_extract(str_replace(., "\\t", " "),
"\\d{1,2}\\s+\\w{3}\\s+\\d{4}"))) %>%
mutate_at(vars(signature, ratification),
funs(date = dmy(.))) %>%
mutate_at(vars(signature_date, ratification_date),
funs(year = year(.))) %>%
filter(!is.na(cowcode))
ratifications.panel <- ratifications %>%
select(cowcode, contains("_year")) %>%
right_join(empty.panel, by = "cowcode") %>%
mutate_at(vars(contains("_year")),
funs(bin = year >= .)) %>%
mutate_at(vars(contains("bin")),
funs(ifelse(is.na(.), FALSE, .))) %>%
mutate(year = as.integer(year), cowcode = as.integer(cowcode)) %>%
select(cowcode, year, palermo.signed = signature_date_year_bin,
palermo.ratified = ratification_date_year_bin)
ratifications.panel %>% glimpse()
## Observations: 2,944
## Variables: 4
## $ cowcode          <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20...
## $ year             <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011...
## $ palermo.signed   <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE...
## $ palermo.ratified <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,...

GDP per capita

We use GDP per capita data (constant 2010 USD) from the World Bank.

Year range: Consistent data up to 2015

wdi.indicators <- c("NY.GDP.PCAP.KD", # GDP per capita (constant 2010 USD)
"NY.GDP.MKTP.CD", # GDP (current dollars)
"NY.GDP.MKTP.KD", # GDP (constant 2010 USD)
"SP.POP.TOTL") # Population, total
wdi.path <- file.path("data/WDI/wdi.rds")
if (!file.exists(wdi.path)) {
# Get all countries and regions because the World Bank chokes on ISO codes like
# XK for Kosovo, even though it returns data for Kosovo with the XK code
# ¯\_(ツ)_/¯
wdi.raw <- WDI(country="all", wdi.indicators,
extra=FALSE, start=2000, end=2015)
saveRDS(wdi.raw, wdi.path)
} else {
wdi.raw <- readRDS(wdi.path)
}
# Filter countries here instead
wdi <- wdi.raw %>%
filter(iso2c %in% unique(cho.clean$iso2)) %>%
arrange(iso2c, year) %>%
mutate(cowcode = countrycode(iso2c, "iso2c", "cown",
custom_match = iso22cow),
year = as.integer(year)) %>%
select(cowcode, year,
gdp.capita.2010 = NY.GDP.PCAP.KD, gdp.2010 = NY.GDP.MKTP.KD,
gdp.current = NY.GDP.MKTP.CD, population = SP.POP.TOTL)
wdi %>% glimpse()
## Observations: 2,928
## Variables: 6
## $ cowcode         <int> 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 6...
## $ year            <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,...
## $ gdp.capita.2010 <dbl> 64133.1472, 61640.5966, 59862.7918, 60968.2784, 60917.8695, 56663.7454,...
## $ gdp.2010        <dbl> 195614307991, 198351118487, 203177907295, 221058661958, 242206098797, 2...
## $ gdp.current     <dbl> 104337372362, 103311640572, 109816201498, 124346358067, 147824370320, 1...
## $ population      <dbl> 3050128, 3217865, 3394060, 3625798, 3975945, 4481976, 5171255, 6010100,...

US aid

USAID provides the complete dataset for its Foreign Aid Explorer as a giant CSV file. The data includes both economic and military aid, but it’s easy to filter out the military aid. Here we only look at obligations, not disbursements, so that the data is comparable to the OECD data from AidData. The data we downloaded provides constant amounts in 2015 dollars; we rescale that to 2010 to match all other variables.

USAID’s conversion to constant 2015 dollars doesn’t seem to take country differences into account—the deflator for each country in 2010 is essentially 91.79. When there are differences, it’s because of floating point issue (like, if there are tiny grants of $3, there aren’t enough decimal points to get the fraction to 91.79). So we just take the median value of the deflator for all countries and all grants and use that.

\[ \begin{aligned} \text{Deflator} &= \frac{\text{current aid}}{\text{constant aid} \times 100} \\ \text{Constant aid in year}_{target} &= \text{Current aid in year}_t \times \frac{\text{deflator in year}_{target}}{\text{deflator in year}_t} \end{aligned} \]

We include only obligated economic aid.

Year range: Consistent data up to 2015

usaid.url <- "https://explorer.usaid.gov/prepared/us_foreign_aid_complete.csv"
usaid.path <- file.path("data", "USAID")
usaid.name <- basename(usaid.url)
# Download USAID data if needed
if (!file.exists(file.path(usaid.path, usaid.name))) {
httr::GET(usaid.url,
httr::write_disk(file.path(usaid.path, usaid.name)),
httr::progress())
}
usaid.raw <- read_csv(file.path(usaid.path, usaid.name),
na = c("", "NA", "NULL"))
usaid.clean <- usaid.raw %>%
filter(assistance_category_name == "Economic") %>%
filter(transaction_type_name == "Obligations") %>%
mutate(country_code = recode(country_code, `CS-KM` = "XKK")) %>%
# Remove regions and World
filter(!str_detect(country_name, "Region"),
!(country_name %in% c("World")),
numeric_year >= 2000, numeric_year < 2016) %>%
mutate(cowcode = countrycode(country_code, "iso3c", "cown",
custom_match = iso32cow)) %>%
# Get rid of tiny states
filter(!is.na(cowcode)) %>%
select(cowcode, year = numeric_year,
oda.us.current = current_amount, oda.us.2015 = constant_amount) %>%
filter(oda.us.current != 0, oda.us.2015 != 0) %>%
mutate(aid.deflator = oda.us.current / oda.us.2015 * 100,
year = as.integer(year))
# Convert 2015 dollars to 2010
usaid.deflator.2010 <- usaid.clean %>%
filter(year == 2010) %>%
summarise(deflator.target.year = median(aid.deflator, na.rm=TRUE)) %>%
as.numeric()
usaid.rescaled <- usaid.clean %>%
mutate(oda.us.2010 = oda.us.current * (usaid.deflator.2010 / aid.deflator))
usaid <- usaid.rescaled %>%
group_by(cowcode, year) %>%
summarise(us.aid = sum(oda.us.2010))
usaid %>% glimpse()
## Observations: 2,753
## Variables: 3
## $ cowcode <int> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 31, 31, 31, 31, 31, 31, 31,...
## $ year    <int> 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2...
## $ us.aid  <dbl> 27679881, 24182690, 23193684, 24352412, 24832111, 27376273, 26477992, 25749988,...

Collect new data

Criminalization

Data on criminalization comes from Judith Kelley’s Scorecard Diplomacy. The published data is complete until 2011, but in the book this data was forward-filled to 2014. Here we forward-fill it to 2015 to match other variables.

Year range: Consistent data up to 2011, but forward-filled to 2014/2015

crim <- read_stata("data/Scorecard Diplomacy/criminalization_jk_2015-08-10.dta") %>%
select(cowcode = ccode, year, crim.level = adjcrimlevel) %>%
mutate(crim.level = ifelse(is.nan(crim.level), 0, crim.level))
crim.panel <- crim %>%
# Add new row for 2015 in one country so that expand() adds it for all countries
rbind(data_frame(cowcode = 2, year = 2015, crim.level = NA)) %>%
expand(cowcode, year) %>%
left_join(crim, by = c("cowcode", "year")) %>%
group_by(cowcode) %>%
# Forward fill
fill(crim.level) %>%
ungroup() %>%
mutate(crim.level.factor = factor(crim.level, levels = 0:2,
labels = c("No criminalization",
"Partial criminalization",
"Full criminalization"),
ordered = TRUE)) %>%
mutate_at(vars(cowcode, year, crim.level),
funs(as.integer))
## Warning: Column `cowcode` has different attributes on LHS and RHS of join
## Warning: Column `year` has different attributes on LHS and RHS of join
crim.panel %>% glimpse()
## Observations: 8,820
## Variables: 4
## $ cowcode           <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ year              <int> 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 197...
## $ crim.level        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ crim.level.factor <ord> No criminalization, No criminalization, No criminalization, No crimin...

Combine all data

Original Cho data

df.cho <- empty.panel %>%
left_join(cho.clean, by = c("cowcode", "year")) %>%
left_join(corruption, by = c("cowcode", "year")) %>%
left_join(polity, by = c("cowcode", "year")) %>%
left_join(gender, by = c("cowcode", "year")) %>%
left_join(wecon, by = c("cowcode", "year")) %>%
left_join(ratifications.panel, by = c("cowcode", "year")) %>%
left_join(wdi, by = c("cowcode", "year")) %>%
left_join(usaid, by = c("cowcode", "year")) %>%
# Adjust some variables now that this is a panel
rename(gdp = gdp.2010, gdp.capita = gdp.capita.2010) %>%
mutate(iso2 = countrycode(cowcode, "cown", "iso2c",
custom_match = cow2iso2),
iso3 = countrycode(cowcode, "cown", "iso3c",
custom_match = cow2iso3)) %>%
mutate(us.aid = ifelse(is.na(us.aid), 0, us.aid),
us.aid.pct.gdp = us.aid / gdp,
gdp.capita_log = log1p(gdp.capita))
testthat::expect_equal(nrow(df.cho), nrow(empty.panel))
if (nrow(df.cho) == nrow(empty.panel)) {
saveRDS(df.cho, "data/df_cho.rds")
}
df.cho %>% datatable(extensions = "Responsive") %>%
formatRound(select_if(df.cho, is.double) %>% colnames())
df.cho.full <- df.cho %>%
group_by(cowcode) %>%
mutate_at(vars(p, prot_prev, prosecution, protection, prevention),
funs(lag = lag(.),
lag2 = lag(., 2))) %>%
ungroup() %>%
mutate(female.leg.prop = ifelse(is.na(female.leg.prop), 0, female.leg.prop))
if (nrow(df.cho.full) == nrow(empty.panel)) {
saveRDS(df.cho.full, "data/df_cho_full.rds")
}

New data

df.crim <- df.cho.full %>%
left_join(crim.panel, by = c("cowcode", "year")) %>%
group_by(cowcode) %>%
mutate_at(vars(starts_with("crim")),
funs(lag = lag(.),
lag2 = lag(., 2))) %>%
ungroup()
if (nrow(df.crim) == nrow(empty.panel)) {
saveRDS(df.crim, "data/df_crim.rds")
}
df.crim %>% datatable(extensions = "Responsive") %>%
formatRound(select_if(df.crim, is.double) %>% colnames())