knitr::opts_chunk$set(fig.retina = 2,
tidy.opts=list(width.cutoff = 100), # For code
options(width = 100)) # For output
library(tidyverse)
library(readxl)
library(haven)
library(stringr)
library(lubridate)
library(countrycode)
library(WDI)
library(rvest)
library(DT)
Cho et al.’s 3P Trafficking Index uses World Bank codes which don’t always perfectly correspond with ISO3 or COW codes. We ignore some countries and make manual changes for others.
We fix these:
We ignore these:
Year range: Consistent data up to 2015
cho.raw <- read_excel("data/Cho 3Ps/3P_Index_2000-2015.xlsx", skip = 2)
countries.to.ignore <- c("ABW", "ANT", "BES", "CUW", "MAC")
wb2iso2 <- c(BES = NA, BHU = "BT", KSV = "XK", RMI = "MH")
wb2iso3 <- c(BES = NA, BHU = "BTN", KSV = "XKK", RMI = "MHL")
wb2cow <- c(BHU = 760L, HKG = 715L, KSV = 347L, RMI = 983L, SRB = 340L)
wb_ignore <- c(ABW = NA, AIA = NA, ANT = NA, ASM = NA, BES = NA, BMU = NA,
COK = NA, CUW = NA, CYM = NA, GRL = NA, GUF = NA, GUM = NA,
JEY = NA, MAC = NA, MTQ = NA, NCL = NA, NIU = NA, PRI = NA,
REU = NA, VIR = NA, WBG = NA)
countryname2cow <- c(`European Union` = NA, Serbia = 340L)
iso22cow <- c(HK = 715L, RS = 340L, XK = 347L)
iso32cow <- c(HKG = 715L, SRB = 340L, SCG = 345L, XKK = 347L, PSE = 669L)
cow2iso2 <- c(`340` = "RS", `347` = "XK", `715` = "HK")
cow2iso3 <- c(`340` = "SRB", `347` = "XKK", `715` = "HKG")
cho.clean <- cho.raw %>%
mutate(iso2 = countrycode(Code, "wb", "iso2c", custom_match = wb2iso2),
iso3 = countrycode(Code, "wb", "iso3c", custom_match = wb2iso3),
cowcode = countrycode(Code, "wb", "cown",
custom_match = c(wb2cow, wb_ignore))) %>%
filter(!(Code %in% countries.to.ignore)) %>%
select(year = Year, iso2, iso3, cowcode,
prosecution = Prosecution, protection = Protection,
prevention = Prevention, p = `Overall 3P`) %>%
mutate_at(vars(year, cowcode, prosecution, protection, prevention, p),
funs(as.integer)) %>%
mutate(prot_prev = protection + prevention)
empty.panel <- cho.clean %>%
expand(cowcode, year)
cho.clean %>% glimpse()
## Observations: 2,868
## Variables: 9
## $ year <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 201...
## $ iso2 <chr> "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF...
## $ iso3 <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG"...
## $ cowcode <int> 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, ...
## $ prosecution <int> NA, 1, NA, 2, 3, 3, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2...
## $ protection <int> NA, 1, NA, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 3, 2...
## $ prevention <int> NA, 1, NA, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 2, 4, 3...
## $ p <int> NA, 3, NA, 8, 9, 8, 7, 6, 6, 7, 6, 6, 9, 9, 9, 9, 6, 6, 8, 7, 8, 8, 7, 9, 7...
## $ prot_prev <int> NA, 2, NA, 6, 6, 5, 5, 4, 4, 5, 4, 4, 5, 5, 5, 5, 4, 4, 6, 5, 6, 6, 5, 7, 5...
We use control of corruption from the World Bank’s Worldwide Governance Indicators data. The official(?) website only offers WGI data as a horrific Excel file, but the WGI’s pseudo-page at the World Bank provides a link to a CSV(!).
There are more countries in the WGI data than in the 3P data. We ignore these:
Year range: Consistent data up to 2015
wgi.raw <- read_csv("data/WGI/WGI_Data.csv")
corruption <- wgi.raw %>%
filter(`Indicator Code` == "CC.EST") %>%
mutate(cowcode = countrycode(`Country Code`, "wb", "cown",
custom_match = c(wb2cow, wb_ignore))) %>%
filter(!is.na(cowcode)) %>%
select(cowcode, matches("\\d{4}")) %>%
gather(year, corruption, -cowcode) %>%
mutate(year = as.integer(year), cowcode = as.integer(cowcode)) %>%
filter(year >= 2000)
corruption %>% glimpse()
## Observations: 2,940
## Variables: 3
## $ cowcode <int> 970, 700, 339, 615, 232, 540, 58, 160, 371, 900, 305, 373, 31, 692, 771, 53,...
## $ year <int> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000...
## $ corruption <dbl> NA, -1.91376245, -0.82448280, -0.94872248, 1.38485932, -1.51669121, 0.913627...
We use democracy data from Polity IV.
Year range: Consistent data up to 2015
polity.raw <- read_excel("data/Polity/p4v2015.xls")
polity <- polity.raw %>%
select(cowcode = ccode, year, polity = polity2) %>%
mutate_at(vars(year, cowcode, polity),
funs(as.integer)) %>%
filter(year >= 2000)
polity %>% glimpse()
## Observations: 2,647
## Variables: 3
## $ cowcode <int> 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700,...
## $ year <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2...
## $ polity <int> -7, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, -1, 5, 5, 7, 7, 7, ...
We use data on the percent of women legislators from the World Bank’s Gender Statistics database.
Dozens of countries don’t correspond to COW codes here, mostly becuase the data includes observations for regions like the Arab World (ARB) and Fragile and conflict affected situations (FCS). Rather than explicitly define every code to ignore, we just implicitly ignore unmatched codes.
I’m super curious to know what the original authors did with this data, too. 72% is missing!
Year range: Super inconsistent data up to 2011, only a few observations in 2012–2014
gender.raw <- read_csv("data/WB gender stats/Gender_Stat_Data.csv")
gender <- gender.raw %>%
filter(`Indicator Code` == "SG.GEN.LSOM.ZS") %>%
mutate(cowcode = countrycode(`Country Code`, "wb", "cown",
custom_match = c(wb2cow, wb_ignore))) %>%
filter(!is.na(cowcode)) %>%
select(cowcode, matches("\\d{4}")) %>%
gather(year, female.leg.prop, -cowcode) %>%
mutate(year = as.integer(year), cowcode = as.integer(cowcode)) %>%
filter(year >= 2000)
gender %>% glimpse()
## Observations: 3,315
## Variables: 3
## $ cowcode <int> 700, 339, 615, 232, 540, 58, 160, 371, 900, 305, 373, 31, 692, 771, 53,...
## $ year <int> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,...
## $ female.leg.prop <dbl> NA, NA, NA, NA, NA, NA, 27.39945, NA, 32.92588, 30.08345, NA, NA, NA, N...
We use data on women’s economic rights from CIRI.
There are three countries with cowcode == 345
that cover three different time periods:
We collapse them all into one continuous country.
Year range: Consistent data up to 2011
ciri.raw <- read_csv("data/CIRI/CIRI Data 1981_2011 2014.04.14.csv",
na = c("-999", "-77", "-66"))
wecon <- ciri.raw %>%
mutate(COW = case_when(
.$CTRY == "Kosovo" ~ 347L,
.$CTRY == "Serbia" ~ 340L,
.$CTRY == "Montenegro" ~ 341L,
TRUE ~ .$COW
)) %>%
filter(CTRY != "Soviet Union") %>%
filter(!(CTRY == "Yugoslavia" & !(YEAR %in% 1981:1991)),
!(CTRY == "Yugoslavia, Federal Republic of" & !(YEAR %in% 2000:2002)),
!(CTRY == "Serbia and Montenegro" & !(YEAR %in% c(1992:1999, 2003:2005)))) %>%
select(cowcode = COW, year = YEAR, wecon = WECON) %>%
filter(year >= 2000)
wecon %>% glimpse()
## Observations: 2,382
## Variables: 3
## $ cowcode <int> 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 700, 339, 339, 339, 339,...
## $ year <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2000, 2...
## $ wecon <int> 0, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1,...
We use data from from the UN’s treaty database on Palermo Protocol ratifications.
Year range: Consistent data up to 2015
treaty.url <- "https://web.archive.org/web/20160403063433/https://treaties.un.org/Pages/ViewDetails.aspx?src=TREATY&mtdsg_no=XVIII-12-a&chapter=18&lang=en"
treaty.file <- file.path("data/Palermo protocol/ratifications.html")
if (!file.exists(treaty.file)) {
download.file(treaty.url, "data/Palermo protocol/ratifications.html")
}
ratifications.raw <- read_html(treaty.file)
ratifications <- ratifications.raw %>%
html_nodes(xpath='//*[@id="ctl00_ContentPlaceHolder1_tblgrid"]') %>%
html_table(header=TRUE) %>% bind_rows() %>%
magrittr::set_colnames(c("participant", "signature", "ratification")) %>%
mutate(cowcode = countrycode(participant, "country.name", "cown",
custom_match = countryname2cow)) %>%
mutate_at(vars(signature, ratification),
funs(str_extract(str_replace(., "\\t", " "),
"\\d{1,2}\\s+\\w{3}\\s+\\d{4}"))) %>%
mutate_at(vars(signature, ratification),
funs(date = dmy(.))) %>%
mutate_at(vars(signature_date, ratification_date),
funs(year = year(.))) %>%
filter(!is.na(cowcode))
ratifications.panel <- ratifications %>%
select(cowcode, contains("_year")) %>%
right_join(empty.panel, by = "cowcode") %>%
mutate_at(vars(contains("_year")),
funs(bin = year >= .)) %>%
mutate_at(vars(contains("bin")),
funs(ifelse(is.na(.), FALSE, .))) %>%
mutate(year = as.integer(year), cowcode = as.integer(cowcode)) %>%
select(cowcode, year, palermo.signed = signature_date_year_bin,
palermo.ratified = ratification_date_year_bin)
ratifications.panel %>% glimpse()
## Observations: 2,944
## Variables: 4
## $ cowcode <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20...
## $ year <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011...
## $ palermo.signed <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE...
## $ palermo.ratified <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,...
We use GDP per capita data (constant 2010 USD) from the World Bank.
Year range: Consistent data up to 2015
wdi.indicators <- c("NY.GDP.PCAP.KD", # GDP per capita (constant 2010 USD)
"NY.GDP.MKTP.CD", # GDP (current dollars)
"NY.GDP.MKTP.KD", # GDP (constant 2010 USD)
"SP.POP.TOTL") # Population, total
wdi.path <- file.path("data/WDI/wdi.rds")
if (!file.exists(wdi.path)) {
# Get all countries and regions because the World Bank chokes on ISO codes like
# XK for Kosovo, even though it returns data for Kosovo with the XK code
# ¯\_(ツ)_/¯
wdi.raw <- WDI(country="all", wdi.indicators,
extra=FALSE, start=2000, end=2015)
saveRDS(wdi.raw, wdi.path)
} else {
wdi.raw <- readRDS(wdi.path)
}
# Filter countries here instead
wdi <- wdi.raw %>%
filter(iso2c %in% unique(cho.clean$iso2)) %>%
arrange(iso2c, year) %>%
mutate(cowcode = countrycode(iso2c, "iso2c", "cown",
custom_match = iso22cow),
year = as.integer(year)) %>%
select(cowcode, year,
gdp.capita.2010 = NY.GDP.PCAP.KD, gdp.2010 = NY.GDP.MKTP.KD,
gdp.current = NY.GDP.MKTP.CD, population = SP.POP.TOTL)
wdi %>% glimpse()
## Observations: 2,928
## Variables: 6
## $ cowcode <int> 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 696, 6...
## $ year <int> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,...
## $ gdp.capita.2010 <dbl> 64133.1472, 61640.5966, 59862.7918, 60968.2784, 60917.8695, 56663.7454,...
## $ gdp.2010 <dbl> 195614307991, 198351118487, 203177907295, 221058661958, 242206098797, 2...
## $ gdp.current <dbl> 104337372362, 103311640572, 109816201498, 124346358067, 147824370320, 1...
## $ population <dbl> 3050128, 3217865, 3394060, 3625798, 3975945, 4481976, 5171255, 6010100,...
USAID provides the complete dataset for its Foreign Aid Explorer as a giant CSV file. The data includes both economic and military aid, but it’s easy to filter out the military aid. Here we only look at obligations, not disbursements, so that the data is comparable to the OECD data from AidData. The data we downloaded provides constant amounts in 2015 dollars; we rescale that to 2010 to match all other variables.
USAID’s conversion to constant 2015 dollars doesn’t seem to take country differences into account—the deflator for each country in 2010 is essentially 91.79. When there are differences, it’s because of floating point issue (like, if there are tiny grants of $3, there aren’t enough decimal points to get the fraction to 91.79). So we just take the median value of the deflator for all countries and all grants and use that.
\[ \begin{aligned} \text{Deflator} &= \frac{\text{current aid}}{\text{constant aid} \times 100} \\ \text{Constant aid in year}_{target} &= \text{Current aid in year}_t \times \frac{\text{deflator in year}_{target}}{\text{deflator in year}_t} \end{aligned} \]
We include only obligated economic aid.
Year range: Consistent data up to 2015
usaid.url <- "https://explorer.usaid.gov/prepared/us_foreign_aid_complete.csv"
usaid.path <- file.path("data", "USAID")
usaid.name <- basename(usaid.url)
# Download USAID data if needed
if (!file.exists(file.path(usaid.path, usaid.name))) {
httr::GET(usaid.url,
httr::write_disk(file.path(usaid.path, usaid.name)),
httr::progress())
}
usaid.raw <- read_csv(file.path(usaid.path, usaid.name),
na = c("", "NA", "NULL"))
usaid.clean <- usaid.raw %>%
filter(assistance_category_name == "Economic") %>%
filter(transaction_type_name == "Obligations") %>%
mutate(country_code = recode(country_code, `CS-KM` = "XKK")) %>%
# Remove regions and World
filter(!str_detect(country_name, "Region"),
!(country_name %in% c("World")),
numeric_year >= 2000, numeric_year < 2016) %>%
mutate(cowcode = countrycode(country_code, "iso3c", "cown",
custom_match = iso32cow)) %>%
# Get rid of tiny states
filter(!is.na(cowcode)) %>%
select(cowcode, year = numeric_year,
oda.us.current = current_amount, oda.us.2015 = constant_amount) %>%
filter(oda.us.current != 0, oda.us.2015 != 0) %>%
mutate(aid.deflator = oda.us.current / oda.us.2015 * 100,
year = as.integer(year))
# Convert 2015 dollars to 2010
usaid.deflator.2010 <- usaid.clean %>%
filter(year == 2010) %>%
summarise(deflator.target.year = median(aid.deflator, na.rm=TRUE)) %>%
as.numeric()
usaid.rescaled <- usaid.clean %>%
mutate(oda.us.2010 = oda.us.current * (usaid.deflator.2010 / aid.deflator))
usaid <- usaid.rescaled %>%
group_by(cowcode, year) %>%
summarise(us.aid = sum(oda.us.2010))
usaid %>% glimpse()
## Observations: 2,753
## Variables: 3
## $ cowcode <int> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 31, 31, 31, 31, 31, 31, 31,...
## $ year <int> 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2...
## $ us.aid <dbl> 27679881, 24182690, 23193684, 24352412, 24832111, 27376273, 26477992, 25749988,...
Data on criminalization comes from Judith Kelley’s Scorecard Diplomacy. The published data is complete until 2011, but in the book this data was forward-filled to 2014. Here we forward-fill it to 2015 to match other variables.
Year range: Consistent data up to 2011, but forward-filled to 2014/2015
crim <- read_stata("data/Scorecard Diplomacy/criminalization_jk_2015-08-10.dta") %>%
select(cowcode = ccode, year, crim.level = adjcrimlevel) %>%
mutate(crim.level = ifelse(is.nan(crim.level), 0, crim.level))
crim.panel <- crim %>%
# Add new row for 2015 in one country so that expand() adds it for all countries
rbind(data_frame(cowcode = 2, year = 2015, crim.level = NA)) %>%
expand(cowcode, year) %>%
left_join(crim, by = c("cowcode", "year")) %>%
group_by(cowcode) %>%
# Forward fill
fill(crim.level) %>%
ungroup() %>%
mutate(crim.level.factor = factor(crim.level, levels = 0:2,
labels = c("No criminalization",
"Partial criminalization",
"Full criminalization"),
ordered = TRUE)) %>%
mutate_at(vars(cowcode, year, crim.level),
funs(as.integer))
## Warning: Column `cowcode` has different attributes on LHS and RHS of join
## Warning: Column `year` has different attributes on LHS and RHS of join
crim.panel %>% glimpse()
## Observations: 8,820
## Variables: 4
## $ cowcode <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ year <int> 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 197...
## $ crim.level <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ crim.level.factor <ord> No criminalization, No criminalization, No criminalization, No crimin...
df.cho <- empty.panel %>%
left_join(cho.clean, by = c("cowcode", "year")) %>%
left_join(corruption, by = c("cowcode", "year")) %>%
left_join(polity, by = c("cowcode", "year")) %>%
left_join(gender, by = c("cowcode", "year")) %>%
left_join(wecon, by = c("cowcode", "year")) %>%
left_join(ratifications.panel, by = c("cowcode", "year")) %>%
left_join(wdi, by = c("cowcode", "year")) %>%
left_join(usaid, by = c("cowcode", "year")) %>%
# Adjust some variables now that this is a panel
rename(gdp = gdp.2010, gdp.capita = gdp.capita.2010) %>%
mutate(iso2 = countrycode(cowcode, "cown", "iso2c",
custom_match = cow2iso2),
iso3 = countrycode(cowcode, "cown", "iso3c",
custom_match = cow2iso3)) %>%
mutate(us.aid = ifelse(is.na(us.aid), 0, us.aid),
us.aid.pct.gdp = us.aid / gdp,
gdp.capita_log = log1p(gdp.capita))
testthat::expect_equal(nrow(df.cho), nrow(empty.panel))
if (nrow(df.cho) == nrow(empty.panel)) {
saveRDS(df.cho, "data/df_cho.rds")
}
df.cho %>% datatable(extensions = "Responsive") %>%
formatRound(select_if(df.cho, is.double) %>% colnames())
df.cho.full <- df.cho %>%
group_by(cowcode) %>%
mutate_at(vars(p, prot_prev, prosecution, protection, prevention),
funs(lag = lag(.),
lag2 = lag(., 2))) %>%
ungroup() %>%
mutate(female.leg.prop = ifelse(is.na(female.leg.prop), 0, female.leg.prop))
if (nrow(df.cho.full) == nrow(empty.panel)) {
saveRDS(df.cho.full, "data/df_cho_full.rds")
}
df.crim <- df.cho.full %>%
left_join(crim.panel, by = c("cowcode", "year")) %>%
group_by(cowcode) %>%
mutate_at(vars(starts_with("crim")),
funs(lag = lag(.),
lag2 = lag(., 2))) %>%
ungroup()
if (nrow(df.crim) == nrow(empty.panel)) {
saveRDS(df.crim, "data/df_crim.rds")
}
df.crim %>% datatable(extensions = "Responsive") %>%
formatRound(select_if(df.crim, is.double) %>% colnames())