Munge data

# Load libraries
library(magrittr)
library(readr)
library(dplyr)
library(tidyr)
library(readxl)
library(stringr)
library(ggplot2)
library(gridExtra)
library(scales)
library(Cairo)
library(grid)
library(vcd)
library(countrycode)
library(maptools)
library(rgdal)

# Load data and add labels
responses.orgs <- readRDS(file.path(PROJHOME, "data_raw", 
                                    "responses_orgs_clean.rds"))
responses.countries <- readRDS(file.path(PROJHOME, "data_raw", 
                                         "responses_countries_clean.rds"))

responses.orgs.labs <- read_csv(file.path(PROJHOME, "data_raw", 
                                          "response_orgs_labels.csv"))
responses.countries.labs <- read_csv(file.path(PROJHOME, "data_raw", 
                                               "response_countries_labels.csv"))

Hmisc::label(responses.orgs, self=FALSE) <- responses.orgs.labs$varlabel
Hmisc::label(responses.countries, self=FALSE) <- responses.countries.labs$varlabel

# Add survey sources
phone <- readRDS(file.path(PROJHOME, "data_raw", "phone.rds"))
linkedin <- readRDS(file.path(PROJHOME, "data_raw", "linkedin.rds"))

responses.orgs %<>% 
  mutate(survey.method = ifelse(survey.id %in% phone, "Phone", 
                                ifelse(survey.id %in% linkedin, "LinkedIn", 
                                       "Online")))

# Add a nicer short ID to each response to reference in the article
set.seed(1234)
nicer.ids <- data_frame(survey.id = responses.orgs$survey.id) %>%
  mutate(clean.id = 1000 + sample(1:n()))
write_csv(nicer.ids, path=file.path(PROJHOME, "data", "id_lookup_WILL_BE_OVERWRITTEN.csv"))

The data is split into two sets: responses.orgs has a row for each surveyed organization and responses.countries has a row for each country organizations responded for (1-4 countries per organization). For ease of analysis, this combines them into one larger dataframe (so organization-level data is repeated). It also removes columns that were added manually, where an RA coded whether a country was mentioned in different questions (with a colum for each country!).

responses.all <- responses.orgs %>% 
  left_join(responses.countries, by="survey.id") %>%
  select(-contains("_c", ignore.case=FALSE)) %>%  # Get rid of all the dummy vars
  left_join(nicer.ids, by="survey.id") %>%
  select(survey.id, clean.id, everything())

Convert some responses into numeric indexes:

importance <- data_frame(Q3.19 = levels(responses.countries$Q3.19),
                         importance = c(2, 1, 0, NA))

positivity <- data_frame(Q3.25 = levels(responses.countries$Q3.25),
                         positivity = c(-1, 1, 0, NA))

improvement <- data_frame(Q3.26 = levels(responses.countries$Q3.26),
                          improvement = c(1, 0, -1, NA))

# Cho data
# TODO: Someday get this directly from the internet, like Freedom House data
#   * http://www.economics-human-trafficking.org/data-and-reports.html
# Except that data doesn't include tier scores, so that would need to come 
# from somewhere else...
tip.change <- read_csv(file.path(PROJHOME, "data", "policy_index.csv")) %>%
  group_by(countryname) %>%
  summarise(avg_tier = mean(tier, na.rm=TRUE),
            improve_tip = (last(na.omit(tier), default=NA) - 
                             first(na.omit(tier), default=NA)),
            change_policy = last(na.omit(p), default=NA) - 
              first(na.omit(p), default=NA)) %>%
  mutate(countryname = countrycode(countryname, "country.name", "country.name"))

# Democracy (Freedom House)
if (!file.exists(file.path(PROJHOME, "data_external", "freedom_house.xls"))) {
  fh.url <- paste0("https://freedomhouse.org/sites/default/files/", 
                   "Individual%20Country%20Ratings%20and%20Status%2C%20",
                   "1973-2015%20%28FINAL%29.xls")
  fh.tmp <- file.path(PROJHOME, "data_external", "freedom_house.xls")
  download.file(fh.url, fh.tmp)
}

fh.raw <- read_excel(file.path(PROJHOME, "data_external", "freedom_house.xls"), 
                     skip=6)
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10 
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10 
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10 
## DEFINEDNAME: 20 00 00 01 1a 00 00 00 01 00 00 00 00 00 00 07 29 17 00 3b 00 00 00 00 ff ff 00 00 00 00 3b 00 00 00 00 06 00 00 00 ff 00 10
# Calculate the number of years covered in the data (each year has three columns)
num.years <- (ncol(fh.raw) - 1)/3

# Create combinations of all the variables and years
var.years <- expand.grid(var = c('PR', 'CL', 'Status'), 
                         year = 1972:(1972 + num.years - 1))

colnames(fh.raw) <- c('country', paste(var.years$var, var.years$year, sep="_"))

# Split columns and convert to long
fh <- fh.raw %>%
  gather(var.year, value, -country) %>%
  separate(var.year, into=c("indicator", "year"), sep="_") %>%
  filter(!is.na(country)) %>%
  spread(indicator, value) %>%
  mutate(year = as.numeric(year),
         CL = suppressWarnings(as.integer(CL)),
         PR = suppressWarnings(as.integer(PR)),
         Status = factor(Status, levels=c("NF", "PF", "F"), 
                         labels=c("Not free", "Partially free", "Free"),
                         ordered=TRUE),
         total.freedom = CL + PR,
         country.clean = countrycode(country, "country.name", "country.name")) %>%
  filter(!is.na(CL) & !is.na(PR)) %>%
  # All the cases we're interested in are after 2000, so we can remove these
  # problematic double countries
  filter(!(country %in% c("Germany, E.", "Germany, W.", "USSR", "Vietnam, N.", 
                          "Vietnam, S.", "Yemen, N.", "Yemen, S."))) %>%
  # Again, because we only care about post-2000 Serbia, merge with Yugoslavia
  mutate(country.clean = ifelse(country.clean == "Yugoslavia", 
                                "Serbia", country.clean)) %>%
  select(-country, country=country.clean)

fh.summary <- fh %>%
  filter(year >= 2000) %>%
  group_by(country) %>%
  summarize(total.freedom = mean(total.freedom, na.rm=TRUE))

# Funding
funding.raw <- read_csv(file.path(PROJHOME, "data_raw", "funding_clean.csv")) %>%
  mutate(cowcode = ifelse(country == "Serbia", 555, cowcode),
         countryname = countrycode(cowcode, "cown", "country.name"),
         countryname = ifelse(cowcode == 555, "Serbia", countryname)) %>%
  filter(!is.na(countryname)) 

funding.all <- funding.raw %>%
  group_by(countryname) %>%
  summarise(total.funding = sum(amount, na.rm=TRUE),
            avg.funding = mean(amount, na.rm=TRUE)) 

funding.ngos <- funding.raw %>%
  filter(recipient_type %in% c("NGO", "NPO")) %>%
  group_by(countryname) %>%
  summarise(total.funding.ngos = sum(amount, na.rm=TRUE),
            avg.funding.ngos = mean(amount, na.rm=TRUE)) 

responses.full <- responses.all %>%
  filter(work.only.us != "Yes") %>%
  mutate(work.country.clean = countrycode(work.country, 
                                          "country.name", "country.name"),
         work.country.clean = ifelse(is.na(work.country), 
                                     "Global", work.country.clean),
         work.country = work.country.clean) %>%
  left_join(tip.change, by=c("work.country" = "countryname")) %>%
  left_join(funding.all, by=c("work.country" = "countryname")) %>%
  left_join(funding.ngos, by=c("work.country" = "countryname")) %>%
  left_join(fh.summary, by=c("work.country" = "country")) %>%
  left_join(positivity, by = "Q3.25") %>%
  left_join(importance, by = "Q3.19") %>%
  left_join(improvement, by = "Q3.26") %>%
  mutate(received.funding = ifelse(Q3.18_3 != 1 | is.na(Q3.18_3), FALSE, TRUE),
         us.involvement = ifelse(Q3.18_5 != 1 | is.na(Q3.18_5), TRUE, FALSE),
         us.hq = ifelse(home.country == "United States", TRUE, FALSE),
         Q3.19 = factor(Q3.19, levels=c("Most important actor", 
                                        "Somewhat important actor", 
                                        "Not an important actor", 
                                        "Don't know"),
                        ordered=TRUE),
         Q3.25_collapsed = ifelse(Q3.25 == "Negative", NA, Q3.25)) %>%
  mutate(home.region = countrycode(home.country, "country.name", "continent"),
         home.region = ifelse(home.country == "Kosovo", "Europe", home.region),
         home.region = ifelse(home.country == "TWN", "Asia", home.region),
         home.region = ifelse(home.region == "Oceania", "Asia", home.region),
         home.region = ifelse(home.region == "Asia", "Asia and Oceania", home.region),
         work.region = countrycode(work.country, "country.name", "continent"),
         work.region = ifelse(work.country == "Kosovo", "Europe", work.region),
         work.region = ifelse(work.country == "TWN", "Asia", work.region),
         work.region = ifelse(work.region == "Oceania", "Asia", work.region),
         work.region = ifelse(work.region == "Asia", "Asia and Oceania", work.region)) %>%
  mutate(home.iso3 = countrycode(home.country, "country.name", "iso3c"),
         home.iso3 = ifelse(home.country == "Kosovo", "KOS", home.iso3)) %>%
  mutate(work.iso3 = countrycode(work.country, "country.name", "iso3c"),
         work.iso3 = ifelse(work.country == "Kosovo", "KOS", work.iso3))

country.indexes <- responses.full %>%
  filter(!is.na(work.country)) %>%
  group_by(work.country) %>%
  # Needs mutate + mutate_each + select + unique because you can't mix 
  # summarise + summarise_each. See http://stackoverflow.com/a/31815540/120898
  mutate(num.responses = n()) %>%
  mutate_each(funs(score = mean(., na.rm=TRUE), stdev = sd(., na.rm=TRUE), 
                   n = sum(!is.na(.))),
              c(positivity, importance, improvement)) %>%
  select(work.country, num.responses, matches("positivity_|importance_|improvement_")) %>%
  unique %>%
  ungroup() %>%
  arrange(desc(num.responses))

# Load map data
if (!file.exists(file.path(PROJHOME, "data_external", "map_data", 
                           "ne_110m_admin_0_countries.VERSION.txt"))) {
  map.url <- paste0("http://www.naturalearthdata.com/", 
                    "http//www.naturalearthdata.com/download/110m/cultural/", 
                    "ne_110m_admin_0_countries.zip")
  map.tmp <- file.path(PROJHOME, "data_external", basename(map.url))
  download.file(map.url, map.tmp)
  unzip(map.tmp, exdir=file.path(PROJHOME, "data_external", "map_data"))
  unlink(map.tmp)
}

countries.map <- readOGR(file.path(PROJHOME, "data_external", "map_data"), 
                         "ne_110m_admin_0_countries")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/andrew/Research/••Projects/Human trafficking/Anti-TIP NGOs and the US/data_external/map_data", layer: "ne_110m_admin_0_countries"
## with 177 features
## It has 63 fields
countries.robinson <- spTransform(countries.map, CRS("+proj=robin"))
countries.ggmap <- fortify(countries.robinson, region="iso_a3") %>%
  filter(!(id %in% c("ATA", -99))) %>%  # Get rid of Antarctica and NAs
  mutate(id = ifelse(id == "GRL", "DNK", id))  # Greenland is part of Denmark

# All possible countries (to fix the South Sudan issue)
possible.countries <- data_frame(id = unique(as.character(countries.ggmap$id)))


# Save data
# TODO: Make responses.full more anonymous before making it public
# TODO: Save as CSV and Stata too instead of just R
saveRDS(responses.full, file.path(PROJHOME, "data", "responses_full.rds"))
saveRDS(country.indexes, file.path(PROJHOME, "data", "country_indexes.rds"))

# CSV for book plots
df.book.plots <- responses.full %>% 
  select(clean.id, starts_with("Q3.18"), 
         starts_with("Q3.21"), -contains("TEXT"))
write_csv(df.book.plots, 
          file.path(PROJHOME, "data", "data_q3_18_21.csv"))


# Useful functions
theme_clean <- function(base_size=9, base_family="Source Sans Pro Light") {
  ret <- theme_bw(base_size, base_family) + 
    theme(panel.background = element_rect(fill="#ffffff", colour=NA),
          axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
          title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
          panel.border = element_blank(), axis.line=element_blank(),
          panel.grid.minor=element_blank(),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_line(size=0.25, colour="grey90"),
          axis.ticks=element_blank(),
          legend.position="bottom", 
          axis.title=element_text(size=rel(0.8), family="Source Sans Pro Semibold"),
          strip.text=element_text(size=rel(0.9), family="Source Sans Pro Semibold"),
          strip.background=element_rect(fill="#ffffff", colour=NA),
          panel.margin=unit(1, "lines"), legend.key.size=unit(.7, "line"),
          legend.key=element_blank())
  
  ret
}

# For maps
theme_blank_map <- function(base_size=12, base_family="Source Sans Pro Light") {
  ret <- theme_bw(base_size, base_family) + 
    theme(panel.background = element_rect(fill="#ffffff", colour=NA),
          title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
          panel.border=element_blank(), axis.line=element_blank(),
          panel.grid=element_blank(), axis.ticks=element_blank(),
          axis.title=element_blank(), axis.text=element_blank(),
          legend.text=element_text(size=rel(0.7), family="Source Sans Pro Light"),
          legend.title=element_text(size=rel(0.7), family="Source Sans Pro Semibold"),
          strip.text=element_text(size=rel(1), family="Source Sans Pro Semibold"))
  ret
}

# Return a data frame of counts and proportions for multiple responses
separate.answers.summary <- function(df, cols, labels, total=FALSE) {
  cols.to.select <- which(colnames(df) %in% cols)
  
  denominator <- df %>%
    select(cols.to.select) %>%
    mutate(num.answered = rowSums(., na.rm=TRUE)) %>%
    filter(num.answered > 0) %>%
    nrow()
  
  df <- df %>%
    select(survey.id, cols.to.select) %>%
    gather(question, value, -survey.id) %>%
    mutate(question = factor(question, labels=labels, ordered=TRUE)) %>%
    group_by(question) %>%
    summarize(response = sum(value, na.rm=TRUE), 
              pct = round(response / denominator * 100, 2),
              plot.pct = response / denominator)
  
  colnames(df) <- c("Answer", "Responses", "%", "plot.pct")
  
  if (total) {
    df <- df %>% select(1:3)
    df <- rbind(as.matrix(df), c("Total responses", denominator, "—"))
  }
  
  return(list(df=df, denominator=denominator))
}

# Create a character vector of significance stars
add.stars <- function(x) {
  as.character(symnum(x, corr = FALSE,
                      cutpoints = c(0,  .001,.01,.05, .1, 1),
                      symbols = c("***","**","*","."," ")))
}

General information from the survey

Response rate

Calculating the response rate is a little tricky because of all the different ways we sent out invitations for and conducted the survey (LinkedIn, e-mail, phone).

Denominator calculations

entries.in.list <- 1421  # Organizations in the master NGO list
no.details <- 98  # Organizations that we never tried to make any contact with ever
bounced.invitations <- 132  # Contact information was dead; never saw the survey
not.ngos <- 69  # Organizations that weren't NGOs
duplicates <- 19  # Duplicate entries

orgs.saw.survey <- entries.in.list - no.details - bounced.invitations
viable.entries <- orgs.saw.survey - not.ngos - duplicates

We assume that there were 1191 organizations that saw the link to the survey at least three times. Of those, 69 entries were clearly not NGOs or do not work on human trafficking issues (some were government offices, others only do fundraising, etc.), and most of these organizations responded to the e-mail explaining their situation. 19 entires were duplicates of other entries (generally one entry used the foreign name and one used the English translation).

Given all this, the denominator for our survey’s response rate is 1103.

Numerator calculations

(num.total.responses <- nrow(responses.full %>% group_by(survey.id) %>% slice(1)))
## [1] 480

Final response rate

num.total.responses / viable.entries
## [1] 0.4351768

Other survey metadata

How many times did respondents loop through the country questions?

(num.country.orgs <- nrow(responses.full))
## [1] 561
responses.full %>%
  group_by(survey.id) %>%
  summarise(loops = n()) %>%
  do(data.frame(table(.$loops, dnn="Countries"))) %>%
  mutate(prop = Freq / sum(Freq))
##   Countries Freq       prop
## 1         1  415 0.86458333
## 2         2   52 0.10833333
## 3         3   10 0.02083333
## 4         4    3 0.00625000

How did respondents take the survey?

responses.full %>%
  group_by(survey.id) %>%
  slice(1) %>%  # Select each organization's first country
  group_by(survey.method) %>%
  summarise(num = n()) %>%
  mutate(prop = num / sum(num))
## Source: local data frame [3 x 3]
## 
##   survey.method   num       prop
##           (chr) (int)      (dbl)
## 1      LinkedIn     3 0.00625000
## 2        Online   463 0.96458333
## 3         Phone    14 0.02916667

How many respondents answered at least one free response question?

responses.full %>%
  select(Q3.10:Q3.17, Q3.24.Text, Q3.30, Q4.1) %T>%
  {cat("Number of free response questions:", ncol(.), "\n")} %>%
  rowwise() %>% do(wrote.something = !all(is.na(.))) %>%
  ungroup() %>% mutate(wrote.something = unlist(wrote.something)) %>%
  bind_cols(select(responses.full, survey.id)) %>%
  group_by(survey.id) %>%
  summarise(wrote.something = ifelse(sum(wrote.something) > 0, TRUE, FALSE)) %T>%
  {cat("Number responses:", nrow(.), "\n")} %>%
  do(as.data.frame(table(.$wrote.something, dnn="Wrote something"))) %>%
  mutate(Percent = Freq / sum(Freq))
## Number of free response questions: 12 
## Number responses: 480
##   Wrote.something Freq Percent
## 1           FALSE   72    0.15
## 2            TRUE  408    0.85

Export free response questions for manual analysis

responses.full %>% 
  select(survey.id, clean.id, home.country, work.country, 
         Q3.10:Q3.13, Q3.14:Q3.17, Q3.24.Text, Q3.30, Q4.1) %>%
  write_csv(path=file.path(PROJHOME, "data", "free_responses.csv"))

How many organizations did not list the US as an anti-TIP actor but later indicated US support?

no.mention.us <- responses.countries %>% 
  select(survey.id, Q3.6_c2) %>% filter(Q3.6_c2 == 0)# filter(!is.na(Q3.6_c2))

inndicated.us.activity <- responses.full %>%
  select(survey.id, Q3.8) %>% filter(Q3.8 == "Yes")

sum(no.mention.us$survey.id %in% inndicated.us.activity$survey.id)
## [1] 33

How many organizations completed the survey after it turned to US-related questions? That’s hard to tell because the US questions were in the loop, and Qualtrics does not keep completion statistics for questions potenitally hidden by display logic.

final.qualtrics.count <- 511

survey.duration.completed <- responses.full %>%
  select(survey.id, start.time, end.time) %>%
  group_by(survey.id) %>% slice(1) %>% ungroup() %>%
  mutate(time.spent = end.time - start.time,
         mins.spent = time.spent / 60)

median(as.numeric(survey.duration.completed$mins.spent))
## [1] 21.5
survey.duration <- read_csv(file.path(PROJHOME, "data_raw", "response_times.csv")) %>%
  mutate(pct = num / sum(num),
         cum.num = cumsum(num),
         validish.num = sum(num) - cum.num) %T>%
  {print(head(.))}
## Source: local data frame [6 x 5]
## 
##   minutes_spent   num        pct cum.num validish.num
##           (int) (int)      (dbl)   (int)        (int)
## 1             1   249 0.22656961     249          850
## 2             2    83 0.07552320     332          767
## 3             3    41 0.03730664     373          726
## 4             4    19 0.01728844     392          707
## 5             5    20 0.01819836     412          687
## 6             6    27 0.02456779     439          660
longer.than.five <- filter(survey.duration, minutes_spent==6)$validish.num

We attempted to estimate the post-US completion rate in two ways. First, we counted the number of respondents that spent more than 5 minutes taking the survey (assuming that is sufficient time to make it to the US-focused questions), yielding 660 respondents, which is 149 more than the final number of organizations (511). (Sidenote: 511 is bigger than the final official count of 480 because we filtered out US-only responses completed prior to adding internal validation logic and we removed duplicate or invalid responses prior to analysis) However, because the survey filtered out organizations that only work in the US, most respondents were let out of the survey early. Assuming those who took more than 5 minutes completed the survey, we had a completion rate of 77.4%.

pct.completed <- read_csv(file.path(PROJHOME, "data_raw", "pct_completed.csv")) %>%
  mutate(pct = num / sum(num))

more.than.20 <- sum(filter(pct.completed, pct_complete >= 0.2)$num)

Second, we looked at overall completion rates, which again are not entirely accurate because of Qualtrics’ internal question looping logic—i.e., organizations that completed the full survey are only internally marked as completing 20-30% of it. 604 organizations completed at least 20% of the survey. Assuming at least 20% represents approximate survey completion, we had a completion rate of 84.6%.

Thus, given that we filtered out all US-only NGOs in our response rate calculations, and given that we cannot precisely know the true number of survey dropouts, we upwardly bias our completion rate estimate to 90ish%.

Sector overview

Location

Where are these NGOs based?

hq.countries <- responses.full %>%
  group_by(survey.id) %>% slice(1) %>% ungroup() %>%
  rename(id = home.iso3) %>%
  group_by(id) %>%
  summarize(num.ngos = n()) %>%
  ungroup() %>%
  right_join(possible.countries, by="id") %>%
  mutate(num.ceiling = ifelse(num.ngos >= 10, 10, num.ngos),
         prop = num.ngos / sum(num.ngos, na.rm=TRUE)) %>%
  arrange(desc(num.ngos)) %T>%
  {print(head(.))}
## Source: local data frame [6 x 4]
## 
##      id num.ngos num.ceiling       prop
##   (chr)    (int)       (dbl)      (dbl)
## 1   USA       56          10 0.11940299
## 2   IND       27          10 0.05756930
## 3   NGA       25          10 0.05330490
## 4   GBR       21          10 0.04477612
## 5   CAN       14          10 0.02985075
## 6   THA       10          10 0.02132196
hq.regions <- responses.full %>%
  group_by(survey.id) %>% slice(1) %>% ungroup() %>%
  filter(!is.na(home.region)) %>%
  group_by(home.region) %>%
  summarise(num = n()) %>% ungroup() %>%
  mutate(prop = num / sum(num)) %T>%
  {print(head(.))}
## Source: local data frame [4 x 3]
## 
##        home.region   num      prop
##              (chr) (int)     (dbl)
## 1           Africa    80 0.1670146
## 2         Americas   110 0.2296451
## 3 Asia and Oceania   148 0.3089770
## 4           Europe   141 0.2943633
hq.map <- ggplot(hq.countries, aes(fill=num.ceiling, map_id=id)) +
  geom_map(map=countries.ggmap, size=0.15, colour="black") + 
  expand_limits(x=countries.ggmap$long, y=countries.ggmap$lat) + 
  coord_equal() +
  scale_fill_gradient(low="grey95", high="grey20", breaks=seq(2, 10, 2), 
                      labels=c(paste(seq(2, 8, 2), "  "), "10+"),
                      na.value="#FFFFFF", name="NGOs based in country",
                      guide=guide_colourbar(ticks=FALSE, barwidth=6)) + 
  theme_blank_map() +
  theme(legend.position="bottom", legend.key.size=unit(0.65, "lines"),
        strip.background=element_rect(colour="#FFFFFF", fill="#FFFFFF"))

Where do these NGOs work?

work.countries <- responses.full %>%
  rename(id = work.iso3) %>%
  group_by(id) %>%
  summarize(num.ngos = n()) %>%
  ungroup() %>%
  right_join(possible.countries, by="id") %>%
  mutate(num.ceiling = ifelse(num.ngos >= 10, 10, num.ngos),
         prop = num.ngos / sum(num.ngos, na.rm=TRUE)) %>%
  arrange(desc(num.ngos)) %T>%
  {print(head(.))}
## Source: local data frame [6 x 4]
## 
##      id num.ngos num.ceiling       prop
##   (chr)    (int)       (dbl)      (dbl)
## 1   IND       39          10 0.07129799
## 2   NGA       33          10 0.06032907
## 3   GBR       21          10 0.03839122
## 4   NPL       18          10 0.03290676
## 5   PHL       15          10 0.02742230
## 6   THA       15          10 0.02742230
work.regions <- responses.full %>%
  filter(!is.na(work.region)) %>%
  group_by(work.region) %>%
  summarise(num = n()) %>% ungroup() %>%
  mutate(prop = num / sum(num)) %T>%
  {print(head(.))}
## Source: local data frame [4 x 3]
## 
##        work.region   num      prop
##              (chr) (int)     (dbl)
## 1           Africa   115 0.2060932
## 2         Americas    72 0.1290323
## 3 Asia and Oceania   220 0.3942652
## 4           Europe   151 0.2706093
work.map <- ggplot(work.countries, aes(fill=num.ceiling, map_id=id)) +
  geom_map(map=countries.ggmap, size=0.15, colour="black") + 
  expand_limits(x=countries.ggmap$long, y=countries.ggmap$lat) + 
  coord_equal() +
  scale_fill_gradient(low="grey95", high="grey20", breaks=seq(2, 10, 2), 
                      labels=c(paste(seq(2, 8, 2), "  "), "10+"),
                      na.value="#FFFFFF", name="NGOs working in country",
                      guide=guide_colourbar(ticks=FALSE, barwidth=6)) + 
  theme_blank_map() +
  theme(legend.position="bottom", legend.key.size=unit(0.65, "lines"),
        strip.background=element_rect(colour="#FFFFFF", fill="#FFFFFF"))

Combined maps

fig.maps <- arrangeGrob(hq.map, work.map, nrow=1)
grid.draw(fig.maps)

ggsave(fig.maps, filename=file.path(PROJHOME, "figures", "fig_maps.pdf"),
       width=6, height=3, units="in", device=cairo_pdf, scale=1.5)
ggsave(fig.maps, filename=file.path(PROJHOME, "figures", "fig_maps.png"),
       width=6, height=3, units="in", type="cairo", scale=1.5)

Side-by-side graph of home vs. work regions

plot.hq <- hq.regions %>%
  arrange(num) %>%
  mutate(region = factor(home.region, levels=home.region, ordered=TRUE),
         prop.nice = sprintf("%.1f%%", prop * 100))

plot.work <- work.regions %>%
  mutate(region = factor(work.region, levels=levels(plot.hq$region), ordered=TRUE),
         prop.nice = sprintf("%.1f%%", prop * 100))

fig.hq <- ggplot(plot.hq, aes(x=region, y=num)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = prop.nice), size=2.5, hjust=1.3, 
            family="Source Sans Pro Light") + 
  labs(x=NULL, y="NGOs based in region") + 
  scale_y_continuous(trans="reverse", expand = c(.1, .1)) + 
  coord_flip(ylim=c(0, 200)) + 
  theme_clean() + 
  theme(axis.text.y = element_blank(), 
        axis.line.y = element_blank(),
        plot.margin = unit(c(1,0.5,1,1), "lines"))

fig.work <- ggplot(plot.work, aes(x=region, y=num)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = prop.nice), size=2.5, hjust=-0.3, 
            family="Source Sans Pro Light") + 
  labs(x=NULL, y="NGOs working in region") + 
  scale_y_continuous(expand = c(.15, .15)) + 
  coord_flip() + 
  theme_clean() + 
  theme(axis.text.y = element_text(hjust=0.5), 
        axis.line.y = element_blank(),
        plot.margin = unit(c(1,1,1,0), "lines"))

fig.locations <- arrangeGrob(fig.hq, fig.work, nrow=1)
grid.draw(fig.locations)

ggsave(fig.locations, filename=file.path(PROJHOME, "figures", "fig_locations.pdf"),
       width=5, height=1.5, units="in", device=cairo_pdf, scale=2.5)
ggsave(fig.locations, filename=file.path(PROJHOME, "figures", "fig_locations.png"),
       width=5, height=1.5, units="in", type="cairo", scale=2.5)

Where do different regional NGOs work?

responses.full %>%
  filter(!is.na(work.region)) %>%
  group_by(home.region, work.region) %>%
  summarise(num = n()) %>%
  group_by(home.region) %>%
  mutate(prop.home.region = num / sum(num)) %>%
  print
## Source: local data frame [14 x 4]
## Groups: home.region [4]
## 
##         home.region      work.region   num prop.home.region
##               (chr)            (chr) (int)            (dbl)
## 1            Africa           Africa    88       0.96703297
## 2            Africa Asia and Oceania     2       0.02197802
## 3            Africa           Europe     1       0.01098901
## 4          Americas           Africa    16       0.11594203
## 5          Americas         Americas    70       0.50724638
## 6          Americas Asia and Oceania    43       0.31159420
## 7          Americas           Europe     9       0.06521739
## 8  Asia and Oceania           Africa     2       0.01234568
## 9  Asia and Oceania Asia and Oceania   158       0.97530864
## 10 Asia and Oceania           Europe     2       0.01234568
## 11           Europe           Africa     9       0.05389222
## 12           Europe         Americas     2       0.01197605
## 13           Europe Asia and Oceania    17       0.10179641
## 14           Europe           Europe   139       0.83233533

Amount and type of TIP work

orgs.only <- responses.full %>%
  group_by(survey.id) %>% slice(1) %>% ungroup()

How much time and resources do these NGOs spend on trafficking?

fig.time <- ggplot(data=orgs.only,
                   aes(x=as.numeric(Q2.1)/100, y=(..count.. / sum(..count..)))) + 
  geom_histogram(binwidth=0.1) + 
  labs(x="Proportion of time spent on trafficking", y="Proportion of responses") + 
  scale_x_continuous(labels=percent, limits=c(0, 1), breaks=seq(0, 1, 0.2)) +
  scale_y_continuous(labels=percent, breaks=seq(0, 0.12, 0.02)) + 
  coord_cartesian(ylim=c(0, 0.125)) + 
  theme_clean() + theme(panel.grid.major.y = element_line(size=0.25, colour="grey90"))
fig.time

ggsave(fig.time, filename=file.path(PROJHOME, "figures", "fig_time.pdf"),
       width=5, height=2, units="in", device=cairo_pdf)
ggsave(fig.time, filename=file.path(PROJHOME, "figures", "fig_time.png"),
       width=5, height=2, units="in", type="cairo")

Summary stats of time spent

orgs.only %>%
  summarise(avg.time = mean(Q2.1, na.rm=TRUE),
            med.time = median(Q2.1, na.rm=TRUE),
            min.time = min(Q2.1, na.rm=TRUE),
            max.time = max(Q2.1, na.rm=TRUE))
## Source: local data frame [1 x 4]
## 
##   avg.time med.time min.time max.time
##      (dbl)    (dbl)    (int)    (int)
## 1 56.96476       56        1      100

How much do organizations know about trafficking in the countries they work in?

responses.full %>%
  group_by(Q3.3) %>%
  summarise(num = n()) %>%
  filter(!is.na(Q3.3)) %>%
  mutate(prop = num / sum(num)) %T>%
  {print(sum(.$num))}
## [1] 555
## Source: local data frame [6 x 3]
## 
##          Q3.3   num        prop
##        (fctr) (int)       (dbl)
## 1        None     4 0.007207207
## 2 Very little    17 0.030630631
## 3      Little    15 0.027027027
## 4        Some   103 0.185585586
## 5       A lot   412 0.742342342
## 6  Don't know     4 0.007207207

What trafficking issues is the organization involved with?

cols <- c("Q2.2_1", "Q2.2_2", "Q2.2_3", "Q2.2_4")
labels <- c("Organ trafficking", "Sex trafficking",
            "Labor trafficking", "Other")

issues <- separate.answers.summary(orgs.only, cols, labels)
issues$denominator  # Number of responses
## [1] 479
issues$df
## Source: local data frame [4 x 4]
## 
##              Answer Responses     %   plot.pct
##              (fctr)     (int) (dbl)      (dbl)
## 1 Organ trafficking        30  6.26 0.06263048
## 2   Sex trafficking       408 85.18 0.85177453
## 3 Labor trafficking       294 61.38 0.61377871
## 4             Other       116 24.22 0.24217119
plot.data <- issues$df %>% 
  mutate(Answer=factor(Answer, levels=rev(labels), ordered=TRUE))

fig.issues <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
  geom_bar(aes(y=plot.pct), stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.issues

How many NGOs deal with both sex and labor trafficking?

orgs.only %>% 
  mutate(sex = ifelse(is.na(Q2.2_2), 0, 1),
         labor = ifelse(is.na(Q2.2_3), 0, 1),
         both = sex == 1 & labor == 1) %>% 
  summarise(sex = sum(sex), labor = sum(labor), 
            num.both = sum(both), prop.both = num.both / issues$denominator)
## Source: local data frame [1 x 4]
## 
##     sex labor num.both prop.both
##   (dbl) (dbl)    (int)     (dbl)
## 1   408   294      243 0.5073069

Other responses about issues NGOs deal with

# orgs.only %>% filter(!is.na(Q2.2_4_TEXT)) %>% 
#   select(clean.id, Q2.2_4_TEXT) %>% View

Which kinds of victims do NGOs help?

cols <- c("Q2.3_1", "Q2.3_2", "Q2.3_3")
labels <- c("Children", "Adults", "Other")

victims <- separate.answers.summary(orgs.only, cols, labels)
victims$denominator  # Number of responses
## [1] 478
victims$df
## Source: local data frame [3 x 4]
## 
##     Answer Responses     %  plot.pct
##     (fctr)     (int) (dbl)     (dbl)
## 1 Children       335 70.08 0.7008368
## 2   Adults       318 66.53 0.6652720
## 3    Other        75 15.69 0.1569038
plot.data <- victims$df %>% 
  mutate(Answer=factor(Answer, levels=rev(labels), ordered=TRUE))

fig.victims <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
  geom_bar(aes(y=plot.pct), stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.victims

Other responses about victims NGOs deal with

# orgs.only %>% filter(!is.na(Q2.3_3_TEXT)) %>% 
#   select(clean.id, Q2.3_3_TEXT) %>% View

How are the types of victims distributed between the main trafficking issues?

victims.issues <- orgs.only %>% 
  select(organs=Q2.2_1, sex=Q2.2_2, labor=Q2.2_3, other.issue=Q2.2_4, 
         children=Q2.3_1, adults=Q2.3_2, other.victims=Q2.3_3) %>%
  mutate_each(funs(!is.na(.)))

victims.issues %>%
  filter(children) %>%
  summarise(num = n(),
            sex = sum(sex), sex.prop = sex / n(),
            labor = sum(labor), labor.prop = labor / n())
## Source: local data frame [1 x 5]
## 
##     num   sex  sex.prop labor labor.prop
##   (int) (int)     (dbl) (int)      (dbl)
## 1   335   291 0.8686567   211  0.6298507
victims.issues %>%
  filter(adults) %>%
  summarise(num = n(),
            sex = sum(sex), sex.prop = sex / n(),
            labor = sum(labor), labor.prop = labor / n())
## Source: local data frame [1 x 5]
## 
##     num   sex  sex.prop labor labor.prop
##   (int) (int)     (dbl) (int)      (dbl)
## 1   318   284 0.8930818   212  0.6666667

Which efforts do NGOs focus on?

cols <- c("Q2.4_1", "Q2.4_2", "Q2.4_3", "Q2.4_4", "Q2.4_5")
labels <- c("Prevention and education", "Prosecutions and legal issues",
            "Victim protection", "Victim assistance", "Other")

efforts <- separate.answers.summary(orgs.only, cols, labels)
efforts$denominator  # Number of responses
## [1] 479
efforts$df
## Source: local data frame [5 x 4]
## 
##                          Answer Responses     %  plot.pct
##                          (fctr)     (int) (dbl)     (dbl)
## 1      Prevention and education       398 83.09 0.8308977
## 2 Prosecutions and legal issues       188 39.25 0.3924843
## 3             Victim protection       248 51.77 0.5177453
## 4             Victim assistance       340 70.98 0.7098121
## 5                         Other       128 26.72 0.2672234
plot.data <- efforts$df %>% 
  arrange(plot.pct) %>%
  mutate(Answer=factor(Answer, levels=Answer, ordered=TRUE))

fig.efforts <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
  geom_bar(aes(y=plot.pct), stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.efforts

Other responses about efforts NGOs engage in

# orgs.only %>% filter(!is.na(Q2.4_5_TEXT)) %>%
#   select(clean.id, Q2.4_5_TEXT) %>% View

How are the types of efforts distributed between the main trafficking issues?

efforts.issues <- orgs.only %>% 
  select(prevention=Q2.4_1, legal=Q2.4_2, protection=Q2.4_3, assistance=Q2.4_4, 
         sex=Q2.2_2, labor=Q2.2_3) %>%
  mutate_each(funs(!is.na(.)))

efforts.issues %>%
  filter(prevention) %>%
  summarise(num = n(),
            sex = sum(sex), sex.prop = sex / n(),
            labor = sum(labor), labor.prop = labor / n())
## Source: local data frame [1 x 5]
## 
##     num   sex  sex.prop labor labor.prop
##   (int) (int)     (dbl) (int)      (dbl)
## 1   398   338 0.8492462   259  0.6507538
efforts.issues %>%
  filter(legal) %>%
  summarise(num = n(),
            sex = sum(sex), sex.prop = sex / n(),
            labor = sum(labor), labor.prop = labor / n())
## Source: local data frame [1 x 5]
## 
##     num   sex sex.prop labor labor.prop
##   (int) (int)    (dbl) (int)      (dbl)
## 1   188   168 0.893617   125  0.6648936
efforts.issues %>%
  filter(protection) %>%
  summarise(num = n(),
            sex = sum(sex), sex.prop = sex / n(),
            labor = sum(labor), labor.prop = labor / n())
## Source: local data frame [1 x 5]
## 
##     num   sex  sex.prop labor labor.prop
##   (int) (int)     (dbl) (int)      (dbl)
## 1   248   219 0.8830645   172  0.6935484
efforts.issues %>%
  filter(assistance) %>%
  summarise(num = n(),
            sex = sum(sex), sex.prop = sex / n(),
            labor = sum(labor), labor.prop = labor / n())
## Source: local data frame [1 x 5]
## 
##     num   sex  sex.prop labor labor.prop
##   (int) (int)     (dbl) (int)      (dbl)
## 1   340   297 0.8735294   218  0.6411765

Work with other actors

Which institutions have been active in anti-TIP work?

cols <- c("Q3.5_1", "Q3.5_2", "Q3.5_3", "Q3.5_4", "Q3.5_5")
labels <- c("The national government", "NGOs and civil society",
            "Foreign governments", "International organizations", "Other")

other.actors <- separate.answers.summary(responses.full, cols, labels)
other.actors$denominator  # Number of responses
## [1] 553
other.actors$df
## Source: local data frame [5 x 4]
## 
##                        Answer Responses     %  plot.pct
##                        (fctr)     (int) (dbl)     (dbl)
## 1     The national government       375 67.81 0.6781193
## 2      NGOs and civil society       526 95.12 0.9511754
## 3         Foreign governments       232 41.95 0.4195298
## 4 International organizations       370 66.91 0.6690778
## 5                       Other        81 14.65 0.1464738
plot.data <- other.actors$df %>% 
  arrange(plot.pct) %>%
  mutate(Answer=factor(Answer, levels=Answer, ordered=TRUE))

fig.other.actors <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
  geom_bar(aes(y=plot.pct), stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.other.actors

Other responses

others <- responses.full %>% select(survey.id, starts_with("Q3.5_5_")) %>%
  filter(!is.na(Q3.5_5_TEXT)) %>%
  # Convert values to numeric
  mutate_each(funs(as.numeric(levels(.))[.]), -c(survey.id, Q3.5_5_TEXT))

cols <- c(c("Q3.5_5_LawEnforcement", "Q3.5_5_Education", 
            "Q3.5_5_DomesticGovernment", "Q3.5_5_ReligiousGroups", 
            "Q3.5_5_Embassies", "Q3.5_5_InternationalGroups", 
            "Q3.5_5_NGOs", "Q3.5_5_Other", "Q3.5_5_Media", 
            "Q3.5_5_ExperienceofSurviviors", 
            "Q3.5_5_PrivateSector", "Q3.5_5_Unions"))

labels <- c("Law Enforcement", "Education", 
            "Domestic Government", "Religious Groups", 
            "Embassies", "International Groups", 
            "NGOs", "Other", "Media", 
            "Experience of Surviviors", 
            "Private Sector", "Unions")

(others.summary <- separate.answers.summary(others, cols, labels))
## $df
## Source: local data frame [12 x 4]
## 
##                      Answer Responses     %   plot.pct
##                      (fctr)     (dbl) (dbl)      (dbl)
## 1           Law Enforcement         8 11.27 0.11267606
## 2                 Education         4  5.63 0.05633803
## 3       Domestic Government        25 35.21 0.35211268
## 4          Religious Groups         9 12.68 0.12676056
## 5                 Embassies         2  2.82 0.02816901
## 6      International Groups        10 14.08 0.14084507
## 7                      NGOs        10 14.08 0.14084507
## 8                     Other         9 12.68 0.12676056
## 9                     Media         2  2.82 0.02816901
## 10 Experience of Surviviors         3  4.23 0.04225352
## 11           Private Sector         2  2.82 0.02816901
## 12                   Unions         2  2.82 0.02816901
## 
## $denominator
## [1] 71

How hard is the government working?

responses.full %>%
  group_by(Q3.20) %>%
  summarise(num = n()) %>%
  filter(!is.na(Q3.20)) %>%
  mutate(prop = num / sum(num))
## Source: local data frame [6 x 3]
## 
##             Q3.20   num       prop
##            (fctr) (int)      (dbl)
## 1 Not hard at all    53 0.09636364
## 2    Not too hard   143 0.26000000
## 3   Somewhat hard   220 0.40000000
## 4       Very hard    93 0.16909091
## 5  Extremely hard    19 0.03454545
## 6      Don't know    22 0.04000000

Have NGOs used the TIP report to talk with any of these groups?

cols <- c("Q3.21_1", "Q3.21_2", "Q3.21_3", "Q3.21_4")
labels <- c("National government", "Another government",
            "Other NGOs", "Other")

tip.discuss <- separate.answers.summary(responses.full, cols, labels)
tip.discuss$denominator  # Number of responses
## [1] 402
tip.discuss$df
## Source: local data frame [4 x 4]
## 
##                Answer Responses     %  plot.pct
##                (fctr)     (int) (dbl)     (dbl)
## 1 National government       212 52.74 0.5273632
## 2  Another government        67 16.67 0.1666667
## 3          Other NGOs       313 77.86 0.7786070
## 4               Other        85 21.14 0.2114428
plot.data <- tip.discuss$df %>% 
  arrange(plot.pct) %>%
  mutate(Answer=factor(Answer, levels=Answer, ordered=TRUE))

fig.tip.discuss <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
  geom_bar(aes(y=plot.pct), stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.tip.discuss

Government restrictions and oversight

Do members of the government or ruling party sit on the NGO’s board?

responses.full %>%
  group_by(Q3.27) %>%
  summarise(num = n()) %>%
  filter(!is.na(Q3.27)) %>%
  mutate(prop = num / sum(num))
## Source: local data frame [3 x 3]
## 
##        Q3.27   num       prop
##       (fctr) (int)      (dbl)
## 1         No   475 0.86837294
## 2        Yes    41 0.07495430
## 3 Don't know    31 0.05667276

For those that said yes, is the NGO required to have a government official sit on the board?

responses.full %>%
  group_by(Q3.28) %>%
  summarise(num = n()) %>%
  filter(!is.na(Q3.28)) %>%
  mutate(prop = num / sum(num))
## Source: local data frame [3 x 3]
## 
##        Q3.28   num  prop
##       (fctr) (int) (dbl)
## 1         No    25 0.625
## 2        Yes    13 0.325
## 3 Don't know     2 0.050

How restricted does the NGO feel by the host government?

responses.full %>%
  group_by(Q3.29) %>%
  summarise(num = n()) %>%
  filter(!is.na(Q3.29)) %>%
  mutate(prop = num / sum(num)) %T>%
  {cat("Number of responses:", sum(.$num), "\n")}
## Number of responses: 547
## Source: local data frame [6 x 3]
## 
##                    Q3.29   num       prop
##                   (fctr) (int)      (dbl)
## 1         Not restricted   201 0.36745887
## 2 Very little restricted   118 0.21572212
## 3    A little restricted    51 0.09323583
## 4    Somewhat restricted    93 0.17001828
## 5        Very restricted    41 0.07495430
## 6             Don't know    43 0.07861060

Which countries do NGOs say are restrictive?

responses.full %>%
  filter(Q3.29 %in% c("Somewhat restricted", "Very restricted")) %>%
  group_by(work.country) %>%
  summarise(num = n()) %>%
  arrange(desc(num)) %T>%
  {cat("Number of countries:", nrow(.), "\n")}
## Number of countries: 66
## Source: local data frame [66 x 2]
## 
##                             work.country   num
##                                    (chr) (int)
## 1                                  India    11
## 2                                Nigeria     7
## 3                                  Nepal     6
## 4                               Thailand     6
## 5                               Cambodia     5
## 6  Congo, the Democratic Republic of the     4
## 7                         United Kingdom     4
## 8                              Australia     3
## 9                                   Iraq     3
## 10                                 Italy     3
## ..                                   ...   ...

Explanations of restrictions

# responses.full %>% filter(!is.na(Q3.30)) %>%
#   select(clean.id, Q3.30) %>% View

NGO opinions of US activity

# Select just the columns that have cowcodes embedded in them
active.embassies.raw <- responses.countries %>%
  select(contains("_c", ignore.case=FALSE)) %>%
  mutate_each(funs(as.numeric(levels(.))[.]))  # Convert values to numeric

# Select only the rows where they responded (i.e. not all columns are NA)
num.responses <- active.embassies.raw %>%
  rowwise() %>% do(all.missing = all(!is.na(.))) %>%
  ungroup() %>% mutate(all.missing = unlist(all.missing)) %>%
  summarise(total = sum(all.missing))

# Tidy cowcode columns and summarize most commonly mentioned countries
active.embassies <- active.embassies.raw %>%
  gather(country.raw, num) %>%
  group_by(country.raw) %>% summarise(num = sum(num, na.rm=TRUE)) %>%
  mutate(country.raw = str_replace(country.raw, "Q.*c", ""),
         country = countrycode(country.raw, "cown", "country.name"),
         country = ifelse(country.raw == "2070", "European Union", country)) %>%
  ungroup() %>% mutate(prop = num / num.responses$total,
                       prop.nice = sprintf("%.1f%%", prop * 100))

Which embassies or foreign governments NGOs were reported as active partners in the fight against human trafficking?

active.embassies.top <- active.embassies %>%
  arrange(num) %>% select(-country.raw) %>%
  filter(num > 10) %>%
  mutate(country = factor(country, levels=country, ordered=TRUE)) %>%
  arrange(desc(num))

active.embassies %>% arrange(desc(num)) %>% 
  select(-country.raw) %>% filter(num > 10)
## Source: local data frame [12 x 4]
## 
##      num        country       prop prop.nice
##    (dbl)          (chr)      (dbl)     (chr)
## 1    260  United States 0.76923077     76.9%
## 2     43 United Kingdom 0.12721893     12.7%
## 3     35    Netherlands 0.10355030     10.4%
## 4     30         France 0.08875740      8.9%
## 5     26         Norway 0.07692308      7.7%
## 6     26 European Union 0.07692308      7.7%
## 7     24    Switzerland 0.07100592      7.1%
## 8     21         Sweden 0.06213018      6.2%
## 9     19      Australia 0.05621302      5.6%
## 10    17        Germany 0.05029586      5.0%
## 11    17          Italy 0.05029586      5.0%
## 12    12         Canada 0.03550296      3.6%
nrow(active.embassies)  # Number of countries mentioned
## [1] 64
num.responses$total  # Total responses
## [1] 338
# Most active embassies
# Save Q3.7 to a CSV for hand coding
most.active <- responses.countries %>%
  select(Q3.7) %>%
  filter(!is.na(Q3.7))
write_csv(most.active, path=file.path(PROJHOME, "data", 
                                      "most_active_WILL_BE_OVERWRITTEN.csv"))

# Read in hand-coded CSV
if (file.exists(file.path(PROJHOME, "data", "most_active.csv"))) {
  most.active <- read_csv(file.path(PROJHOME, "data", "most_active.csv"))
} else {
  stop("data/most_active.csv is missing")
}

# Split comma-separated countries, unnest them into multiple rows, and 
# summarize most active countries
most.active.clean <- most.active %>%
  transform(clean = strsplit(clean, ",")) %>%
  unnest(clean) %>%
  mutate(clean = str_trim(clean)) %>%
  group_by(clean) %>%
  summarise(total = n()) %>%
  mutate(prop = total / nrow(most.active),
         prop.nice = sprintf("%.1f%%", prop * 100))

Which countries or embassies have been the most active?

most.active.clean %>% arrange(desc(total))
## Source: local data frame [40 x 4]
## 
##             clean total       prop prop.nice
##             (chr) (int)      (dbl)     (chr)
## 1   United States   188 0.70149254     70.1%
## 2            None    16 0.05970149      6.0%
## 3  European Union    14 0.05223881      5.2%
## 4             All    12 0.04477612      4.5%
## 5     Switzerland     8 0.02985075      3.0%
## 6       Australia     7 0.02611940      2.6%
## 7           Italy     7 0.02611940      2.6%
## 8  United Kingdom     7 0.02611940      2.6%
## 9     Netherlands     6 0.02238806      2.2%
## 10         Norway     6 0.02238806      2.2%
## ..            ...   ...        ...       ...
nrow(most.active.clean) - 1  # Subtract one because of "None"s
## [1] 39

Over the last 10–15 years, has the United States or its embassy been active in the fight against human trafficking in X?

responses.countries$Q3.8 %>% table %>% print %>% prop.table
## .
##         No        Yes Don't know 
##         39        344        150
## .
##         No        Yes Don't know 
## 0.07317073 0.64540338 0.28142589

Side-by-side graph of active countries + most active countries

plot.data <- active.embassies.top %>%
  bind_rows(data_frame(num=0, country=c("All", "None"), 
                       prop=0, prop.nice="")) %>%
  arrange(num) %>%
  mutate(country = factor(country, levels=country, ordered=TRUE))

plot.data.active <- most.active.clean %>%
  filter(clean %in% plot.data$country) %>%
  mutate(country = factor(clean, levels=levels(plot.data$country), ordered=TRUE))

fig.active <- ggplot(plot.data, aes(x=country, y=num)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = prop.nice), size=3.5, hjust=1.3, 
            family="Source Sans Pro Light") + 
  labs(x=NULL, y="Number of times country was mentioned as a partner in anti-TIP work") + 
  scale_y_continuous(breaks=seq(0, max(active.embassies$num), by=25), 
                     trans="reverse", expand = c(.1, .1)) + 
  coord_flip() + 
  theme_clean() + 
  theme(axis.text.y = element_blank(), 
        axis.line.y = element_blank(),
        plot.margin = unit(c(1,0.5,1,1), "lines"))

fig.most.active <- ggplot(plot.data.active, aes(x=country, y=total)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = prop.nice), size=3.5, hjust=-0.3, 
            family="Source Sans Pro Light") + 
  labs(x=NULL, y="Number of times country was mentioned as the most active partner in anti-TIP work") + 
  scale_y_continuous(expand = c(.15, .15)) + 
  coord_flip() + 
  theme_clean() + 
  theme(axis.text.y = element_text(hjust=0.5), 
        axis.line.y = element_blank(),
        plot.margin = unit(c(1,1,1,0), "lines"))
  
fig.embassies <- arrangeGrob(fig.active, fig.most.active, nrow=1)
grid.draw(fig.embassies)

ggsave(fig.embassies, filename=file.path(PROJHOME, "figures", "fig_embassies.pdf"),
       width=5, height=2, units="in", device=cairo_pdf, scale=2.5)
ggsave(fig.embassies, filename=file.path(PROJHOME, "figures", "fig_embassies.png"),
       width=5, height=2, units="in", type="cairo", scale=2.5)

saveRDS(active.embassies, file.path(PROJHOME, "data", "active_embassies.rds"))
saveRDS(most.active.clean, file.path(PROJHOME, "data", "most_active_embassies.rds"))
write_csv(plot.data, 
          file.path(PROJHOME, "data", 
                    "data_figure2_x_active_embassies_plot.csv"))
write_csv(plot.data.active, 
          file.path(PROJHOME, "data", 
                    "data_figure2_x_most_active_embassies_plot.csv"))

Actual US activities

cols <- c("Q3.9_1", "Q3.9_2", "Q3.9_3", "Q3.9_4", "Q3.9_5",
          "Q3.9_6", "Q3.9_7", "Q3.9_8", "Q3.9_9", "Q3.9_10")
labels <- c("Asking for legislation", "Convening conferences or workshops",
            "Raising awareness", "Providing resources or funding",
            "Increasing government attention", "Training government officials",
            "Contributing to a government action plan", "Other", "Don't know",
            "The US has not been involved in trafficking issues")

activities <- separate.answers.summary(responses.countries, cols, labels)
activities$denominator  # Number of responses
## [1] 532
activities$df
## Source: local data frame [10 x 4]
## 
##                                                Answer Responses     %   plot.pct
##                                                (fctr)     (int) (dbl)      (dbl)
## 1                              Asking for legislation       165 31.02 0.31015038
## 2                  Convening conferences or workshops       208 39.10 0.39097744
## 3                                   Raising awareness       214 40.23 0.40225564
## 4                      Providing resources or funding       212 39.85 0.39849624
## 5                     Increasing government attention       217 40.79 0.40789474
## 6                       Training government officials       146 27.44 0.27443609
## 7            Contributing to a government action plan       114 21.43 0.21428571
## 8                                               Other        43  8.08 0.08082707
## 9                                          Don't know        26  4.89 0.04887218
## 10 The US has not been involved in trafficking issues       166 31.20 0.31203008
plot.data <- activities$df %>% 
  mutate(Answer=factor(Answer, levels=rev(labels), ordered=TRUE))

fig.activities <- ggplot(plot.data, aes(x=Answer, y=Responses)) +
  geom_bar(aes(y=plot.pct), stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$plot.pct, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.activities

ggsave(fig.activities, filename=file.path(PROJHOME, "figures", "fig_activities.pdf"),
       width=6.5, height=5, units="in", device=cairo_pdf)
ggsave(fig.activities, filename=file.path(PROJHOME, "figures", "fig_activities.png"),
       width=6.5, height=5, units="in", type="cairo")

NGO opinions of US importance

General importance

plot.data <- responses.full %>%
  group_by(Q3.19) %>%
  summarize(num = n()) %>%
  na.omit() %>%
  mutate(prop = num / sum(num),
         prop.nice = sprintf("%.1f%%", prop * 100),
         Q3.19 = factor(Q3.19, levels=rev(levels(Q3.19)), ordered=TRUE))
plot.data
## Source: local data frame [4 x 4]
## 
##                      Q3.19   num      prop prop.nice
##                     (fctr) (int)     (dbl)     (chr)
## 1     Most important actor   139 0.2662835     26.6%
## 2 Somewhat important actor   182 0.3486590     34.9%
## 3   Not an important actor    68 0.1302682     13.0%
## 4               Don't know   133 0.2547893     25.5%
fig.us_importance <- ggplot(plot.data, aes(x=Q3.19, y=prop)) + 
  geom_bar(stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$num, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.us_importance

Average importance by country

importance.plot <- country.indexes %>%
  filter(num.responses >= 10) %>%
  arrange(importance_score) %>%
  mutate(country_label = factor(work.country, levels=unique(work.country), 
                                labels=paste0(work.country, " (", num.responses, ")"),
                                ordered=TRUE)) 

fig.avg_importance <- ggplot(importance.plot, aes(x=country_label, y=importance_score)) + 
  geom_pointrange(aes(ymax=importance_score + importance_stdev,
                      ymin=importance_score - importance_stdev)) + 
  labs(x="Country (number of responses)", 
       y="Importance of the US in anti-TIP efforts (mean)") + 
  scale_y_discrete(breaks=c(0, 1, 2), labels=c("Not important", "Somewhat important", "Most important")) + 
  coord_flip(ylim=c(0, 2)) + 
  theme_clean() + theme(legend.position="bottom")
fig.avg_importance

ggsave(fig.avg_importance, filename=file.path(PROJHOME, "figures", "fig_avg_importance.pdf"),
       width=6.5, height=3, units="in", device=cairo_pdf)
ggsave(fig.avg_importance, filename=file.path(PROJHOME, "figures", "fig_avg_importance.png"),
       width=6.5, height=3, units="in", type="cairo")

Are opinions of the US’s importance associated with…?

df.importance <- responses.full %>% 
  select(Q3.19, work.country, change_policy, avg_tier, improve_tip, change_policy, 
         importance, received.funding, us.involvement, total.funding, 
         total.freedom, us.hq, time.spent=Q2.1) %>% 
  filter(!is.na(Q3.19)) %>%
  filter(Q3.19 != "Don't know") %>%
  mutate(importance_factor = factor(Q3.19, ordered=FALSE),
         log.total.funding = log1p(total.funding),
         time.spent = as.numeric(time.spent))

Average tier rating

Average tier doesn’t show much because it doesn’t show any changes in time—just how bad the country is in general?

importance.means <- df.importance %>%
  group_by(Q3.19) %>%
  summarize(avg_points = mean(avg_tier, na.rm=TRUE),
            var_points = var(avg_tier, na.rm=TRUE)) %>%
  print
## Source: local data frame [3 x 3]
## 
##                      Q3.19 avg_points var_points
##                     (fctr)      (dbl)      (dbl)
## 1     Most important actor   2.053344  0.1171988
## 2 Somewhat important actor   1.920723  0.2256463
## 3   Not an important actor   1.763046  0.3492590

Plot group means and distributions

fig.importance <- ggplot(df.importance, aes(x=Q3.19, y=avg_tier)) +
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05, show.legend=FALSE) +
  geom_point(data=importance.means, aes(x=Q3.19, y=avg_points), size=5, show.legend=FALSE) + 
  labs(x="Opinion of US importance", y="Average TIP tier rating") + 
  coord_flip() + theme_clean()
fig.importance

Those means appear slightly different from each other. Is that really the case? Check with ANOVA, which assumes homogenous variance across groups. Throw every possible test at it—if null is rejected (p < 0.05 or whatever) then variance is likely heterogenous: (helpful reference)

bartlett.test(avg_tier ~ importance_factor, data=df.importance)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  avg_tier by importance_factor
## Bartlett's K-squared = 30.187, df = 2, p-value = 2.786e-07
car::leveneTest(avg_tier ~ importance_factor, data=df.importance)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   2  15.809 2.521e-07 ***
##       385                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fligner.test(avg_tier ~ importance_factor, data=df.importance)  # Uses median
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  avg_tier by importance_factor
## Fligner-Killeen:med chi-squared = 21.833, df = 2, p-value = 1.816e-05
kruskal.test(avg_tier ~ importance_factor, data=df.importance)  # Nonparametric
## 
##  Kruskal-Wallis rank sum test
## 
## data:  avg_tier by importance_factor
## Kruskal-Wallis chi-squared = 11.561, df = 2, p-value = 0.003087

All of those p-values are tiny, so it’s clear that variance is not the same across groups. However, there’s a rule of thumb (super detailed example) that ANOVA is robust to heterogeneity of variance as long as the largest variance is less than four times the smallest variance.

Given that rule of thumb, the variance here isn’t that much of an issue

df.importance %>% group_by(importance_factor) %>%
  summarise(variance = var(avg_tier, na.rm=TRUE)) %>%
  do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
## 
##      ratio
##      (dbl)
## 1 2.980057

It would be cool to use Bayesian ANOVA to account for non-homogenous variances (see John Kruschke’s evangelizing), since it handles violations of ANOVA assumptions nicely. However, in his example, the ratio of min/max variance is huge, so it does lead to big differences in results:

#
# read_csv("http://www.indiana.edu/~kruschke/DoingBayesianDataAnalysis/Programs/NonhomogVarData.csv") %>%
#   group_by(Group) %>%
#   summarise(variance = var(Y)) %>%
#   do(data_frame(ratio = max(.$variance) / min(.$variance)))
#   # ratio = 64
#

With the variance issue handled, run the ANOVA:

importance.aov <- aov(avg_tier ~ importance_factor, data=df.importance)
summary(importance.aov)
##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## importance_factor   2   3.98  1.9911   9.559 8.88e-05 ***
## Residuals         385  80.19  0.2083                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness

There is some significant difference between groups. Look at pairwise comparisons between all the groups to (kind of) decompose that finding:

(importance.pairs <- TukeyHSD(importance.aov, "importance_factor"))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = avg_tier ~ importance_factor, data = df.importance)
## 
## $importance_factor
##                                                       diff        lwr          upr     p adj
## Somewhat important actor-Most important actor   -0.1326211 -0.2537216 -0.011520667 0.0278636
## Not an important actor-Most important actor     -0.2902984 -0.4492046 -0.131392159 0.0000647
## Not an important actor-Somewhat important actor -0.1576772 -0.3104069 -0.004947536 0.0412011

Plot the differences:

importance.pairs.plot <- data.frame(importance.pairs$importance_factor) %>%
  mutate(pair = row.names(.),
         pair = factor(pair, levels=pair, ordered=TRUE),
         stars = add.stars(p.adj))

fig.importance.pairs <- ggplot(importance.pairs.plot, 
                               aes(x=pair, y=diff, ymax=upr, ymin=lwr)) + 
  geom_hline(yintercept=0) + 
  geom_text(aes(label=stars), nudge_x=0.25) +
  geom_pointrange() + 
  theme_clean() + coord_flip()
fig.importance.pairs

Another way of checking group means in non-homogenous data is to use ordinal logistic regression. Here’s an ordered logit and corresponding predicted probabilities:

model.importance <- ordinal::clm(Q3.19 ~ avg_tier, data=df.importance, 
                                 link="logit", Hess=TRUE)
summary(model.importance)
## formula: Q3.19 ~ avg_tier
## data:    df.importance
## 
##  link  threshold nobs logLik  AIC    niter max.grad cond.H 
##  logit flexible  388  -389.83 785.66 5(0)  2.96e-12 1.9e+02
## 
## Coefficients:
##          Estimate Std. Error z value Pr(>|z|)    
## avg_tier  -0.9033     0.2117  -4.266 1.99e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##                                                 Estimate Std. Error z value
## Most important actor|Somewhat important actor    -2.3618     0.4328  -5.457
## Somewhat important actor|Not an important actor  -0.1425     0.4109  -0.347
## (1 observation deleted due to missingness)
# Predicted probabilities
newdata <- data_frame(avg_tier = seq(0, 3, 0.1))
pred.importance <- predict(model.importance, newdata, interval=TRUE)

# Create plot data
pred.plot.lower <- cbind(newdata, pred.importance$lwr) %>%
  gather(importance, lwr, -c(1:ncol(newdata)))
pred.plot.upper <- cbind(newdata, pred.importance$upr) %>%
  gather(importance, upr, -c(1:ncol(newdata)))

pred.plot.data <- cbind(newdata, pred.importance$fit) %>%
  gather(importance, importance_prob, -c(1:ncol(newdata))) %>%
  left_join(pred.plot.lower, by=c("avg_tier", "importance")) %>%
  left_join(pred.plot.upper, by=c("avg_tier", "importance"))

importance.colors <- c("grey20", "grey40", "grey60", "grey80")
ggplot(pred.plot.data, aes(x=avg_tier, y=importance_prob)) +  
  geom_ribbon(aes(ymax=upr, ymin=lwr, fill=importance), 
              alpha=0.2) + 
  geom_line(aes(colour=importance), size=2) + 
  scale_y_continuous(labels=percent) + 
  labs(x="Average tier rating in country", 
       y="Predicted probability of assigning importance") + 
  # scale_fill_manual(values=importance.colors, name=NULL) + 
  # scale_colour_manual(values=importance.colors, name=NULL) +
  theme_clean()

Improvement in TIP scores

Opinions of importance are not related to changes in TIP score. The average Improvement in TIP rating is the same for each possible answer of importance.

ggplot(df.importance, aes(x=Q3.19, y=improve_tip)) + 
  geom_violin(fill="grey90") + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Improvement in TIP tier rating") + 
  coord_flip() + theme_clean()

Variance is equal in all groups:

kruskal.test(improve_tip ~ importance_factor, data=df.importance)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  improve_tip by importance_factor
## Kruskal-Wallis chi-squared = 0.058986, df = 2, p-value = 0.9709

ANOVA shows no differences:

change.anova <- aov(improve_tip ~ importance_factor, data=df.importance) 
summary(change.anova)
##                    Df Sum Sq Mean Sq F value Pr(>F)
## importance_factor   2   0.13 0.06264   0.202  0.817
## Residuals         385 119.35 0.30999               
## 1 observation deleted due to missingness
TukeyHSD(change.anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = improve_tip ~ importance_factor, data = df.importance)
## 
## $importance_factor
##                                                       diff        lwr       upr     p adj
## Somewhat important actor-Most important actor   0.03306968 -0.1146676 0.1808069 0.8583270
## Not an important actor-Most important actor     0.04517562 -0.1486830 0.2390342 0.8474172
## Not an important actor-Somewhat important actor 0.01210595 -0.1742176 0.1984295 0.9871998

Change in Cho scores

Opinions of importance vary slightly by changes in Cho policy scores. Respondents who indicated that the US was more important tended to work in countries with greater changes in TIP policy.

ggplot(df.importance, aes(x=Q3.19, y=change_policy)) + 
  geom_violin(fill="grey90") + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Change in TIP policy index") + 
  coord_flip() + theme_clean()

Variance is equal in all groups:

kruskal.test(change_policy ~ importance_factor, data=df.importance)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  change_policy by importance_factor
## Kruskal-Wallis chi-squared = 4.2577, df = 2, p-value = 0.119

ANOVA shows no differences

cho.change.anova <- aov(change_policy ~ importance_factor, data=df.importance) 
summary(cho.change.anova)  # (⌐○Ϟ○)  ♥  \(•◡•)/
##                    Df Sum Sq Mean Sq F value Pr(>F)  
## importance_factor   2   32.1  16.050   2.376 0.0943 .
## Residuals         385 2600.6   6.755                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
TukeyHSD(cho.change.anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = change_policy ~ importance_factor, data = df.importance)
## 
## $importance_factor
##                                                       diff       lwr        upr     p adj
## Somewhat important actor-Most important actor   -0.3665885 -1.056234 0.32305659 0.4241547
## Not an important actor-Most important actor     -0.8270207 -1.731963 0.07792109 0.0813204
## Not an important actor-Somewhat important actor -0.4604322 -1.330200 0.40933546 0.4271368

US funding received by the responding organization

Organizations that have been received funding from the US are more likely to consider the US to play an important role in the countries they work in.

funding.table <- df.importance %>%
  xtabs(~ importance_factor + received.funding, .) %>% print
##                           received.funding
## importance_factor          FALSE TRUE
##   Most important actor        81   58
##   Somewhat important actor   149   33
##   Not an important actor      65    3

There’s an overall significant difference (though two of the cells are really small here)

(funding.chi <- chisq.test(funding.table))
## 
##  Pearson's Chi-squared test
## 
## data:  funding.table
## X-squared = 41.487, df = 2, p-value = 9.798e-10
# Cramer's V for standardized measure of association
assocstats(funding.table)
##                     X^2 df   P(> X^2)
## Likelihood Ratio 44.434  2 2.2450e-10
## Pearson          41.487  2 9.7985e-10
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.31 
## Cramer's V        : 0.327
# Components of chi-squared
(components <- funding.chi$residuals^2)
##                           received.funding
## importance_factor               FALSE       TRUE
##   Most important actor      5.6532084 17.7414518
##   Somewhat important actor  0.8734059  2.7410080
##   Not an important actor    3.4985820 10.9795925
round(1-pchisq(components, funding.chi$parameter), 3)
##                           received.funding
## importance_factor          FALSE  TRUE
##   Most important actor     0.059 0.000
##   Somewhat important actor 0.646 0.254
##   Not an important actor   0.174 0.004
# Visualize differences
mosaic(funding.table,
       labeling_args=list(set_varnames=c(received.funding="Received US funding", 
                                         importance_factor="Opinion of US"),
                          gp_labels=(gpar(fontsize=8))), 
       gp_varnames=gpar(fontsize=10, fontface=2))

US funding received by the country the organization works in

Opinions of importance are strongly associated with US TIP funding given to a country. Organizations are more likely to think the US is an important actor if they work in countries receiving more anti-TIP funding.

ggplot(df.importance, aes(x=Q3.19, y=log.total.funding)) + 
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05) + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Total TIP funding to country (logged)") + 
  scale_y_continuous(labels=trans_format("exp", dollar_format())) +
  coord_flip() + theme_clean()

Variance is not equal in all groups:

kruskal.test(log.total.funding ~ importance_factor, data=df.importance)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  log.total.funding by importance_factor
## Kruskal-Wallis chi-squared = 13.789, df = 2, p-value = 0.001014

Ratio is 3ish, which is below 4, so heterogenous variance is okayish:

df.importance %>% group_by(importance_factor) %>%
  summarise(variance = var(log.total.funding, na.rm=TRUE)) %>%
  do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
## 
##      ratio
##      (dbl)
## 1 3.006739

ANOVA shows significant differences:

funding.anova <- aov(log.total.funding ~ importance_factor, data=df.importance) 
summary(funding.anova)
##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## importance_factor   2    864   432.0   14.17 1.16e-06 ***
## Residuals         377  11491    30.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 9 observations deleted due to missingness
(funding.pairs <- TukeyHSD(funding.anova))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = log.total.funding ~ importance_factor, data = df.importance)
## 
## $importance_factor
##                                                      diff       lwr        upr     p adj
## Somewhat important actor-Most important actor   -1.241784 -2.728912  0.2453442 0.1224274
## Not an important actor-Most important actor     -4.368679 -6.305285 -2.4320735 0.0000006
## Not an important actor-Somewhat important actor -3.126895 -4.977403 -1.2763873 0.0002474

See those differences

funding.pairs.plot <- data.frame(funding.pairs$importance_factor) %>%
  mutate(pair = row.names(.),
         pair = factor(pair, levels=pair, ordered=TRUE),
         stars = add.stars(p.adj))

fig.funding.pairs <- ggplot(funding.pairs.plot, 
                            aes(x=pair, y=diff, ymax=upr, ymin=lwr)) + 
  geom_hline(yintercept=0) + 
  geom_text(aes(label=stars), nudge_x=0.25) +
  geom_pointrange() + 
  theme_clean() + coord_flip()
fig.funding.pairs

Democracy (Freedom House political rights + civil liberties)

US importance appears to be associated with the level of democracy in a country. NGOs working in countries with worse democracy (higher numbers of the total freedom scale) are more likely to see the US as the most important anti-TIP actor in that country. Or, rather, on average total freedom is worse in countries where NGOs indicate the US as the most important actor.

fig.importance.freedom <- ggplot(df.importance, aes(x=Q3.19, y=total.freedom)) + 
  # geom_violin(fill="grey90") + 
  geom_point(alpha=0.05, size=0.25) + 
  geom_point(stat="summary", fun.y="mean", size=2) + 
  scale_y_continuous(breaks=seq(2, 14, by=4)) + 
  labs(x=NULL,
       y="Total freedom (political rights + civil liberties; higher is worse)") + 
  coord_flip() + theme_clean(6)
fig.importance.freedom

Variance is not equal in all groups:

kruskal.test(total.freedom ~ importance_factor, data=df.importance)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  total.freedom by importance_factor
## Kruskal-Wallis chi-squared = 12.832, df = 2, p-value = 0.001635

Ratio between min and max variance is low, so we’re okay:

df.importance %>% group_by(importance_factor) %>%
  summarise(variance = var(total.freedom, na.rm=TRUE)) %>%
  do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
## 
##      ratio
##      (dbl)
## 1 1.431773

ANOVA shows significant differences:

democracy.anova <- aov(total.freedom ~ importance_factor, data=df.importance) 
summary(democracy.anova)
##                    Df Sum Sq Mean Sq F value  Pr(>F)   
## importance_factor   2    125   62.52   5.961 0.00282 **
## Residuals         381   3996   10.49                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 5 observations deleted due to missingness
(democracy.pairs <- TukeyHSD(democracy.anova))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = total.freedom ~ importance_factor, data = df.importance)
## 
## $importance_factor
##                                                       diff       lwr         upr     p adj
## Somewhat important actor-Most important actor   -0.8469545 -1.713460  0.01955082 0.0570213
## Not an important actor-Most important actor     -1.6001193 -2.733199 -0.46703938 0.0028051
## Not an important actor-Somewhat important actor -0.7531648 -1.836941  0.33061187 0.2321337

View the differences:

democracy.pairs.plot <- data.frame(democracy.pairs$importance_factor) %>%
  mutate(pair = row.names(.),
         pair = factor(pair, levels=pair, ordered=TRUE),
         stars = add.stars(p.adj))

fig.democracy.pairs <- ggplot(democracy.pairs.plot, 
                            aes(x=pair, y=diff, ymax=upr, ymin=lwr)) + 
  geom_hline(yintercept=0) + 
  geom_text(aes(label=stars), nudge_x=0.25) +
  geom_pointrange() + 
  theme_clean() + coord_flip()
fig.democracy.pairs

Time spent on trafficking

The time NGOs spend on trafficking issues does not appear to be associated with their opinion of US importance.

ggplot(df.importance, aes(x=Q3.19, y=time.spent)) + 
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05) + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Time spent on trafficking issues") + 
  coord_flip() + theme_clean()

Variance is not equal in all groups:

kruskal.test(time.spent ~ importance_factor, data=df.importance)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  time.spent by importance_factor
## Kruskal-Wallis chi-squared = 5.698, df = 2, p-value = 0.0579

Ratio between min and max variance is low, so we’re okay:

df.importance %>% group_by(importance_factor) %>%
  summarise(variance = var(time.spent, na.rm=TRUE)) %>%
  do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
## 
##      ratio
##      (dbl)
## 1 1.325085

ANOVA shows some small overall signifcant differences, but when decomposed, that effect is coming only from the tiny “Don’t know-Somewhat important actor” difference.

time.anova <- aov(time.spent ~ importance_factor, data=df.importance) 
summary(time.anova)
##                    Df Sum Sq Mean Sq F value Pr(>F)  
## importance_factor   2   5923    2961    2.85 0.0591 .
## Residuals         366 380278    1039                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 20 observations deleted due to missingness
(time.pairs <- TukeyHSD(time.anova))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = time.spent ~ importance_factor, data = df.importance)
## 
## $importance_factor
##                                                       diff         lwr      upr     p adj
## Somewhat important actor-Most important actor    8.5081361  -0.2300681 17.24634 0.0582672
## Not an important actor-Most important actor      7.6875000  -3.8109852 19.18599 0.2585089
## Not an important actor-Somewhat important actor -0.8206361 -11.9540844 10.31281 0.9835503

Interaction with the US

Organizations that have been involved with the US are more likely to consider the US to play an important role in the countries they work in.

involvement.table <- df.importance %>%
  xtabs(~ importance_factor + us.involvement, .) %>% print
##                           us.involvement
## importance_factor          FALSE TRUE
##   Most important actor        23  116
##   Somewhat important actor    48  134
##   Not an important actor      38   30

There’s an overall significant difference

(involvement.chi <- chisq.test(involvement.table))
## 
##  Pearson's Chi-squared test
## 
## data:  involvement.table
## X-squared = 35.49, df = 2, p-value = 1.965e-08
# Cramer's V for standardized measure of association
assocstats(involvement.table)
##                     X^2 df   P(> X^2)
## Likelihood Ratio 33.424  2 5.5220e-08
## Pearson          35.490  2 1.9655e-08
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.289 
## Cramer's V        : 0.302
# Components of chi-squared
(components <- involvement.chi$residuals^2)
##                           us.involvement
## importance_factor                FALSE        TRUE
##   Most important actor      6.53059391  2.54226691
##   Somewhat important actor  0.17617716  0.06858325
##   Not an important actor   18.83865808  7.33362047
1-pchisq(components, involvement.chi$parameter)
##                           us.involvement
## importance_factor                 FALSE         TRUE
##   Most important actor     3.818559e-02 2.805135e-01
##   Somewhat important actor 9.156798e-01 9.662897e-01
##   Not an important actor   8.114044e-05 2.555786e-02
# Visualize differences
mosaic(involvement.table,
       labeling_args=list(set_varnames=c(us.involvement="US involvement", 
                                         importance_factor="Opinion of US"),
                          gp_labels=(gpar(fontsize=8))), 
       gp_varnames=gpar(fontsize=10, fontface=2))

Headquarters in the US

NGOs with headquarters in the US are not significantly different from their foreign counterparts in their opinions of the importance of the US.

hq.table <- df.importance %>%
  xtabs(~ importance_factor + us.hq, .) %>% print
##                           us.hq
## importance_factor          FALSE TRUE
##   Most important actor       129   10
##   Somewhat important actor   171   11
##   Not an important actor      59    9

There’s no overall significant difference

(hq.chi <- chisq.test(hq.table))
## 
##  Pearson's Chi-squared test
## 
## data:  hq.table
## X-squared = 3.6785, df = 2, p-value = 0.1589
# Cramer's V is really low
assocstats(hq.table)
##                     X^2 df P(> X^2)
## Likelihood Ratio 3.2576  2  0.19617
## Pearson          3.6785  2  0.15894
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.097 
## Cramer's V        : 0.097
# Components of chi-squared
(components <- hq.chi$residuals^2)
##                           us.hq
## importance_factor                FALSE        TRUE
##   Most important actor     0.004038845 0.048331515
##   Somewhat important actor 0.054876241 0.656685688
##   Not an important actor   0.224774722 2.689804174
1-pchisq(components, hq.chi$parameter)
##                           us.hq
## importance_factor              FALSE      TRUE
##   Most important actor     0.9979826 0.9761239
##   Somewhat important actor 0.9729349 0.7201161
##   Not an important actor   0.8936980 0.2605652
# Visualize differences
mosaic(hq.table,
       labeling_args=list(set_varnames=c(us.involvement="US involvement", 
                                         importance_factor="Opinion of US"),
                          gp_labels=(gpar(fontsize=8))), 
       gp_varnames=gpar(fontsize=10, fontface=2))

NGO opinions of US positivity

General positivity

Important caveat: Respondents were only asked about their opinions of the US’s work (Q3.25) if they indicated that the US was a somewhat important actor or the most important actor (Q3.19)

df.positivity <- responses.full %>% 
  select(Q3.25=Q3.25_collapsed, work.country, change_policy, avg_tier, 
         improve_tip, change_policy, importance, received.funding, us.involvement, 
         total.funding, total.freedom, us.hq, time.spent=Q2.1) %>% 
  filter(!is.na(Q3.25)) %>%
  filter(Q3.25 != "Don't know") %>%
  mutate(positivity.factor = factor(Q3.25, ordered=FALSE),
         log.total.funding = log1p(total.funding),
         time.spent = as.numeric(time.spent))

plot.data <- df.positivity %>%
  group_by(Q3.25) %>%
  summarize(num = n()) %>%
  na.omit() %>%
  mutate(prop = num / sum(num),
         prop.nice = sprintf("%.1f%%", prop * 100),
         Q3.25 = factor(Q3.25, levels=rev(levels(df.positivity$positivity.factor)), 
                        ordered=TRUE))
plot.data
## Source: local data frame [2 x 4]
## 
##      Q3.25   num      prop prop.nice
##     (fctr) (int)     (dbl)     (chr)
## 1    Mixed    64 0.2302158     23.0%
## 2 Positive   214 0.7697842     77.0%
fig.us_positivity <- ggplot(plot.data, aes(x=Q3.25, y=prop)) + 
  geom_bar(stat="identity") + 
  labs(x=NULL, y=NULL) + 
  scale_y_continuous(labels = percent, 
                     breaks = seq(0, max(round(plot.data$num, 1)), by=0.1)) + 
  coord_flip() + theme_clean()
fig.us_positivity

Average positivity by country

positivity.plot <- country.indexes %>%
  filter(num.responses >= 10,
         positivity_score > 0) %>%
  arrange(positivity_score) %>%
  mutate(country_label = factor(work.country, levels=unique(work.country), 
                                labels=paste0(work.country, " (", num.responses, ")"),
                                ordered=TRUE)) 

fig.avg_positivity <- ggplot(positivity.plot, aes(x=country_label, y=positivity_score)) + 
  geom_pointrange(aes(ymax=positivity_score + positivity_stdev,
                      ymin=positivity_score - positivity_stdev)) + 
  labs(x="Country (number of responses)", 
       y="Positivity of the US in anti-TIP efforts (mean)") + 
  scale_y_discrete(breaks=c(-1, 0, 1), labels=c("Negative", "Mixed", "Positive")) + 
  coord_flip(ylim=c(-1, 1)) + 
  theme_clean() + theme(legend.position="bottom")
fig.avg_positivity

ggsave(fig.avg_positivity, filename=file.path(PROJHOME, "figures", "fig_avg_positivity.pdf"),
       width=6.5, height=3, units="in", device=cairo_pdf)
ggsave(fig.avg_positivity, filename=file.path(PROJHOME, "figures", "fig_avg_positivity.png"),
       width=6.5, height=3, units="in", type="cairo")

Are opinions of the US’s positivity associated with…?

Average tier rating

ggplot(df.positivity, aes(x=positivity.factor, y=avg_tier)) +
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05, show.legend=FALSE) +
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Average TIP tier rating") + 
  coord_flip() + theme_clean()

t.test(avg_tier ~ positivity.factor, data=df.positivity)
## 
##  Welch Two Sample t-test
## 
## data:  avg_tier by positivity.factor
## t = 2.6264, df = 122.22, p-value = 0.009732
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03403966 0.24237667
## sample estimates:
##    mean in group Mixed mean in group Positive 
##               2.089290               1.951082

Improvement in TIP scores

NGOs who have positive opinions of the US are more likely to work in countries where the TIP rating has (slightly) decreased on average between 2000 and 2015. This may be because assigning a worse TIP rating to a country represents increased US diplomatic and economic pressure—it is a possible sign that NGOs like scorecard diplomacy.

ggplot(df.positivity, aes(x=positivity.factor, y=improve_tip)) + 
  geom_violin(fill="grey90") + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Improvement in TIP tier rating") + 
  coord_flip() + theme_clean()

Difference is significant

t.test(improve_tip ~ positivity.factor, data=df.positivity)
## 
##  Welch Two Sample t-test
## 
## data:  improve_tip by positivity.factor
## t = 4.0354, df = 113.16, p-value = 9.942e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1632050 0.4780069
## sample estimates:
##    mean in group Mixed mean in group Positive 
##              0.1328125             -0.1877934

Change in Cho scores

In contrast to the changes in actual TIP scores, NGOs that work in countries that show greater improvement in overall TIP policies are more likely to have a positive opinion of the US, perhaps because they are happy about the actual on-the-ground improvements.

ggplot(df.positivity, aes(x=positivity.factor, y=change_policy)) + 
  geom_violin(fill="grey90") + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Change in TIP policy index") + 
  coord_flip() + theme_clean()

Difference is significant

t.test(change_policy ~ positivity.factor, data=df.positivity)
## 
##  Welch Two Sample t-test
## 
## data:  change_policy by positivity.factor
## t = -4.0789, df = 125.67, p-value = 7.978e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.9747917 -0.6845393
## sample estimates:
##    mean in group Mixed mean in group Positive 
##               1.656250               2.985915
df.positivity %>% group_by(positivity.factor) %>%
  summarise(variance = var(change_policy, na.rm=TRUE)) %>%
  do(data_frame(ratio = max(.$variance) / min(.$variance)))
## Source: local data frame [1 x 1]
## 
##      ratio
##      (dbl)
## 1 1.514886

US funding received by the responding organization

funding.table.pos <- df.positivity %>%
  xtabs(~ positivity.factor + received.funding, .) %>% print
##                  received.funding
## positivity.factor FALSE TRUE
##          Mixed       52   12
##          Positive   147   67

Distribution isn’t significantly different

(funding.chi.pos <- chisq.test(funding.table.pos))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  funding.table.pos
## X-squared = 3.2273, df = 1, p-value = 0.07242
# Cramer's V for standardized measure of association
assocstats(funding.table.pos)
##                     X^2 df P(> X^2)
## Likelihood Ratio 4.0576  1 0.043973
## Pearson          3.8197  1 0.050654
## 
## Phi-Coefficient   : 0.117 
## Contingency Coeff.: 0.116 
## Cramer's V        : 0.117
# Components of chi-squared
(components <- funding.chi.pos$residuals^2)
##                  received.funding
## positivity.factor     FALSE      TRUE
##          Mixed    0.8355627 2.1047719
##          Positive 0.2498879 0.6294645
round(1-pchisq(components, funding.chi.pos$parameter), 3)
##                  received.funding
## positivity.factor FALSE  TRUE
##          Mixed    0.361 0.147
##          Positive 0.617 0.428
# Visualize differences
mosaic(funding.table.pos,
       labeling_args=list(set_varnames=c(received.funding="Received US funding", 
                                         positivity.factor="Opinion of US"),
                          gp_labels=(gpar(fontsize=8))), 
       gp_varnames=gpar(fontsize=10, fontface=2))

US funding received by the country the organization works in

ggplot(df.positivity, aes(x=positivity.factor, y=log.total.funding)) + 
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05) + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Total TIP funding to country (logged)") + 
  scale_y_continuous(labels=trans_format("exp", dollar_format())) +
  coord_flip() + theme_clean()

Difference is significant

t.test(log.total.funding ~ positivity.factor, data=df.positivity)
## 
##  Welch Two Sample t-test
## 
## data:  log.total.funding by positivity.factor
## t = 2.4693, df = 140.65, p-value = 0.01473
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2859705 2.5825944
## sample estimates:
##    mean in group Mixed mean in group Positive 
##               14.84245               13.40817

Democracy (Freedom House political rights + civil liberties)

ggplot(df.positivity, aes(x=positivity.factor, y=total.freedom)) + 
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05) + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", 
       y="Total freedom (political rights + civil liberties; higher is worse)") + 
  coord_flip() + theme_clean()

Difference is not quite significant

t.test(total.freedom ~ positivity.factor, data=df.positivity)
## 
##  Welch Two Sample t-test
## 
## data:  total.freedom by positivity.factor
## t = 1.9191, df = 114.74, p-value = 0.05746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02554065  1.61296045
## sample estimates:
##    mean in group Mixed mean in group Positive 
##               7.139456               6.345746

Time spent on trafficking

The time NGOs spend on trafficking issues does not appear to be associated with their opinion of US importance.

ggplot(df.positivity, aes(x=positivity.factor, y=time.spent)) + 
  geom_violin(fill="grey90") + 
  geom_point(alpha=0.05) + 
  geom_point(stat="summary", fun.y="mean", size=5) + 
  labs(x="Opinion of US", y="Time spent on trafficking issues") + 
  coord_flip() + theme_clean()

Difference is no significant:

t.test(time.spent ~ positivity.factor, data=df.positivity)
## 
##  Welch Two Sample t-test
## 
## data:  time.spent by positivity.factor
## t = -0.40173, df = 98.698, p-value = 0.6888
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.022685   7.310983
## sample estimates:
##    mean in group Mixed mean in group Positive 
##               56.21311               58.06897

Interaction with the US

involvement.table.pos <- df.positivity %>%
  xtabs(~ positivity.factor + us.involvement, .) %>% print
##                  us.involvement
## positivity.factor FALSE TRUE
##          Mixed       16   48
##          Positive    45  169

There’s no significant difference

(involvement.chi.pos <- chisq.test(involvement.table.pos))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  involvement.table.pos
## X-squared = 0.25152, df = 1, p-value = 0.616
# Tiny Cramer's V
assocstats(involvement.table.pos)
##                      X^2 df P(> X^2)
## Likelihood Ratio 0.44403  1  0.50518
## Pearson          0.45379  1  0.50054
## 
## Phi-Coefficient   : 0.04 
## Contingency Coeff.: 0.04 
## Cramer's V        : 0.04
# Components of chi-squared
(components <- involvement.chi.pos$residuals^2)
##                  us.involvement
## positivity.factor      FALSE       TRUE
##          Mixed    0.27267366 0.07665020
##          Positive 0.08154726 0.02292342
1-pchisq(components, involvement.chi.pos$parameter)
##                  us.involvement
## positivity.factor     FALSE      TRUE
##          Mixed    0.6015439 0.7818894
##          Positive 0.7752115 0.8796564
# Visualize differences
mosaic(involvement.table.pos,
       labeling_args=list(set_varnames=c(us.involvement="US involvement", 
                                         positivity.factor="Opinion of US"),
                          gp_labels=(gpar(fontsize=8))), 
       gp_varnames=gpar(fontsize=10, fontface=2))

Headquarters in the US

NGOs with headquarters in the US are not significantly different from their foreign counterparts in their opinions of the US in general.

hq.table.pos <- df.positivity %>%
  xtabs(~ positivity.factor + us.hq, .) %>% print
##                  us.hq
## positivity.factor FALSE TRUE
##          Mixed       59    5
##          Positive   201   13

There’s no overall significant difference, but some of the cells are too small

(hq.chi.pos <- chisq.test(hq.table.pos))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hq.table.pos
## X-squared = 0.042508, df = 1, p-value = 0.8367
# Cramer's V is really, really low
assocstats(hq.table.pos)
##                      X^2 df P(> X^2)
## Likelihood Ratio 0.23579  1  0.62726
## Pearson          0.24567  1  0.62014
## 
## Phi-Coefficient   : 0.03 
## Contingency Coeff.: 0.03 
## Cramer's V        : 0.03
# Components of chi-squared
(components <- hq.chi.pos$residuals^2)
##                  us.hq
## positivity.factor       FALSE        TRUE
##          Mixed    0.012244916 0.176871003
##          Positive 0.003662031 0.052896001
1-pchisq(components, hq.chi.pos$parameter)
##                  us.hq
## positivity.factor     FALSE      TRUE
##          Mixed    0.9118885 0.6740762
##          Positive 0.9517457 0.8180985
# Visualize differences
mosaic(hq.table.pos,
       labeling_args=list(set_varnames=c(us.involvement="US involvement", 
                                         positivity.factor="Opinion of US"),
                          gp_labels=(gpar(fontsize=8))), 
       gp_varnames=gpar(fontsize=10, fontface=2))

Miscellaneous questions

Correlation between US funding and US cooperation

What is the correlation between answering yes to funding and yes to direct cooperation in Q3.18? Are the same organizations answering the question and are the questions capturing the same thing?

# Convert all Q3.18 responses to 0/1 if they answered at least one of the options
contact.with.us <- responses.full %>% select(contains("Q3.18")) %>%
  rowwise() %>% do(answered.something = !all(is.na(.))) %>%
  ungroup() %>% mutate(answered.something = unlist(answered.something)) %>%
  bind_cols(select(responses.full, clean.id, contains("Q3.18"))) %>%
  mutate(direct.contact = ifelse(answered.something & is.na(Q3.18_1), 0, Q3.18_1),
         direct.cooperation = ifelse(answered.something & is.na(Q3.18_2), 0, Q3.18_2),
         received.funding = ifelse(answered.something & is.na(Q3.18_3), 0, Q3.18_3),
         other = ifelse(answered.something & is.na(Q3.18_4), 0, Q3.18_4),
         nothing = ifelse(answered.something & is.na(Q3.18_5), 0, Q3.18_5),
         dont.know = ifelse(answered.something & is.na(Q3.18_6), 0, Q3.18_6)) %>%
  filter(answered.something) %>% select(-c(answered.something, contains("Q3.18")))

None of the responses are that corrleated. Contact, cooperation, and funding are all positively correlated at around 0.32-0.46, and having no contact is negatively correlated with those three (as would be expected). They’re not really measuring the same thing, though.

This is also evident with the Chronbach’s α for contact, cooperation, and funding, which is 0.66. A hypothetical index varaible combining those three variables would not be very internally consistent.

cor.matrix <- as.data.frame(cor(select(contact.with.us, -clean.id))) %>%
  mutate(variable = row.names(.),
         variable = factor(variable, levels=rev(variable), ordered=TRUE))

cor.plot <- cor.matrix %>% gather(variable2, value, -variable) %>%
  mutate(clean.value = round(value, 2))

ggplot(cor.plot, aes(x=variable2, variable, fill=value)) + 
  geom_tile() + 
  geom_text(aes(label=clean.value), size=3, hjust=0.5, color="black",
            family="Source Sans Pro Semibold") + 
  labs(x=NULL, y=NULL, fill="Correlation") + 
  scale_fill_gradient2(midpoint=0, low="#91bfdb", mid="#ffffbf", high="#fc8d59") +
  coord_fixed() +
  theme_clean(12) + theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1),
                          panel.grid=element_blank())

contact.with.us %>% 
  select(direct.contact, direct.cooperation, received.funding) %>% 
  as.data.frame %>%  # Because psych::alpha chokes on data_frames
  psych::alpha()
## 
## Reliability analysis   
## Call: psych::alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.66      0.67    0.58       0.4   2 0.047 0.29 0.33
## 
##  lower alpha upper     95% confidence boundaries
## 0.56 0.66 0.75 
## 
##  Reliability if an item is dropped:
##                    raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## direct.contact          0.63      0.63    0.46      0.46 1.73    0.068
## direct.cooperation      0.47      0.48    0.32      0.32 0.92    0.075
## received.funding        0.59      0.59    0.42      0.42 1.47    0.070
## 
##  Item statistics 
##                      n raw.r std.r r.cor r.drop mean   sd
## direct.contact     534  0.79  0.75  0.53   0.43 0.49 0.50
## direct.cooperation 534  0.79  0.81  0.67   0.54 0.21 0.40
## received.funding   534  0.73  0.77  0.57   0.45 0.19 0.39
## 
## Non missing response frequency for each item
##                       0    1 miss
## direct.contact     0.51 0.49    0
## direct.cooperation 0.79 0.21    0
## received.funding   0.81 0.19    0

Maps of countries where the US has been active

countries.where.active <- responses.full %>%
  select(survey.id, work.country, work.iso3) %>%
  left_join(select(responses.countries, survey.id, Q3.6_c2), by="survey.id") %>%
  filter(Q3.6_c2 == 1) %>%
  group_by(work.iso3) %>%
  summarise(num = n()) %>%
  ungroup() %>%
  right_join(possible.countries, by=c("work.iso3"="id")) %>%
  mutate(presence = ifelse(is.na(num) | num < 1, FALSE, TRUE))

Gradient scale

ggplot(countries.where.active, aes(fill=num, map_id=work.iso3)) +
  geom_map(map=countries.ggmap, size=0.15, colour="black") + 
  expand_limits(x=countries.ggmap$long, y=countries.ggmap$lat) + 
  coord_equal() +
  scale_fill_gradient(low="grey95", high="grey20", breaks=seq(2, 16, 4), 
                      na.value="#FFFFFF", name="NGOs reporting US activity",
                      guide=guide_colourbar(ticks=FALSE, barwidth=6)) + 
  theme_blank_map() +
  theme(legend.position="bottom", legend.key.size=unit(0.65, "lines"),
        strip.background=element_rect(colour="#FFFFFF", fill="#FFFFFF"))

Binary

ggplot(countries.where.active, aes(fill=presence, map_id=work.iso3)) +
  geom_map(map=countries.ggmap, size=0.15, colour="black") + 
  expand_limits(x=countries.ggmap$long, y=countries.ggmap$lat) + 
  coord_equal() +
  scale_fill_manual(values=c("#FFFFFF", "grey50"), na.value="#FFFFFF", guide=FALSE) +
  theme_blank_map()

TODOs and other questions

Does opinion of the US vary by: * Whether an NGO focuses on certain types of work?

In which countries does the US seem to have had more collaboration with NGOs?

CHECK: Opinions are not driven by cooptation - look at chapter 1 for boomerang type stuff - cooptation by donors - so in this case, the NGOs aren’t just being bought out?

Drop “don’t know”s - that leads to a t test for positivity

Create a nice simple summary table (like the one from Mike Ward’s paper) with graphs

# # Type of work
# # TODO: work.country is not the most reliable identifier---there be NAs
# # Q2.2_X
# asdf <- responses.full %>% 
#   select(survey.id, matches("Q2.2_\\d$")) %>%
#   gather(type_of_work, value, -survey.id) %>%
#   mutate(type_of_work = factor(type_of_work, levels=paste0("Q2.2_", seq(1:4)), 
#                                labels=c("Organs", "Sex", "Labor", "Other"), 
#                                ordered=TRUE))
# asdf %>%
#   group_by(type_of_work) %>%
#   summarise(bloop = n(),
#             derp = sum(value, na.rm=TRUE),
#             asdf = derp / n())
# # Should be 30, 408, 294, 116 with 479 total?

# #' Does NGO experience on the ground match improvements reported by the State Department?

# #' * Find country averages of government improvement, etc. - then show that X
# #'   number of countries show improvement, etc. 
# #' * Report by organization and by country - how many countries has the US had 
# #'   a positive influence + how many NGOs say the US has had a positive 
# #'   influence
# full <- left_join(country.indexes, tip.change,
#                   by=c("work.country" = "countryname")) %>%
#   filter(num.responses >= 10) %>%
#   mutate(country_label = ifelse(num.responses >= 10, work.country, ""))

# ggplot(full, aes(x=improvement_score, y=change_policy, label=work.country)) + 
#   geom_point() + geom_text(vjust=1.5) +
#   geom_hline(yintercept=0) + 
#   scale_x_continuous(limits=c(0, 1)) + 
#   scale_y_continuous(limits=c(-2, 6))

# ggplot(country.indexes, aes(x=work.country, y=improvement_score)) + 
#   geom_bar(stat="identity") + 
#   coord_flip()

# #' Compare improvement scores with actual changes in TIP score to get a sense
# #' of if NGO experiences reflect changes in rankings

# ggplot(country.indexes, aes(x=work.country, y=positivity_score)) + 
#   geom_bar(stat="identity") + 
#   coord_flip()


# # Importance opinions
# importance.opinions <- responses.all %>%
#   filter(Q3.19 == "Not an important actor") %>%
#   select(survey.id, clean.id, Q3.19, contains("TEXT"), Q4.1)

# responses.all$Q3.19 %>%
#   table %>% print %>% prop.table


# responses.countries %>% 
#   xtabs(~ Q3.25 + Q3.26, .)

# ggplot(responses.orgs, aes(x = Q1.5.factor)) + geom_bar() + 
#   labs(x = Hmisc::label(responses.orgs$Q1.5))


# # Importance of US
# asdf <- responses.all %>% 
#   select(clean.id, Q1.2, Q3.8, Q3.6, Q3.7)

# inconsistent.no <- c(1020, 1152, 1226, 1267, 1323, 1405, 1515)
# inconsistent.dont.know <- c(1051, 1512)

# qwer <- asdf %>%
#   mutate(us.active = ifelse(clean.id %in% c(inconsistent.no, inconsistent.dont.know),
#                             "Yes", as.character(Q3.8)))

# qwer$us.active %>% table %>% print %>% prop.table

# sdfg <- qwer %>% group_by(clean.id) %>% 
#   summarize(said.no = ifelse(any(us.active == "No", na.rm=TRUE), TRUE, FALSE))
# sdfg$said.no %>% table %>% print %>% prop.table




# # US importance and positivity



# # Importance of report 
# responses.orgs$Q2.5 %>% table %>% print %>% prop.table
# responses.countries$Q3.23 %>% table %>% print %>% prop.table

# heard.of.tip <- responses.countries %>% 
#   left_join(responses.orgs, by="survey.id") %>%
#   filter(Q2.5 == "Yes") %>%
#   group_by(survey.id) %>%
#   mutate(know.score = ifelse(Q3.22 == "Don't know", FALSE, TRUE)) %>%
#   select(know.score) %>% unique

# heard.of.tip$know.score %>% table %>% print %>% prop.table


# # Opinions of report
# opinions <- responses.all %>% 
#   select(clean.id, Q1.2, home.country, work.country, Q3.21_1, Q3.21_4_TEXT, Q3.24.Text)

# not.used.tip.ids <- c(1094, 1099, 1106, 1114, 1157, 1221, 1244, 1269, 
#                       1314, 1330, 1354, 1357, 1393, 1425)
# not.used.tip <- responses.all %>%
#   mutate(no.response = ifelse(is.na(Q3.21_1) & is.na(Q3.21_2) & 
#                                 is.na(Q3.21_3) & is.na(Q3.21_4), TRUE, FALSE),
#          explicit.no = ifelse(clean.id %in% not.used.tip.ids, TRUE, FALSE)) %>%
#   select(clean.id, Q1.2, Q3.21_1, Q3.21_2, Q3.21_3, Q3.21_4, no.response, explicit.no) %>%
#   group_by(clean.id) %>%
#   summarize(no.response = ifelse(sum(no.response) > 0, TRUE, FALSE),
#             explicit.no = ifelse(sum(explicit.no) > 0, TRUE, FALSE))

# not.used.tip$explicit.no %>% table
▲ Back to top ▲