library(tidyverse)
library(crackdownsphilanthropy)
library(csvy)
library(here)
# Load data
# results_file <- file.path(here(), "data", "test_data.csv")
results_file <- here("data", "raw_data", "crackdowns_philanthropy_raw.csv")
# Possible answers
favorability <- c("Very unfavorable", "Unfavorable", "Neutral",
"Favorable", "Very favorable")
likelihood <- c("Extremely unlikely", "Somewhat unlikely",
"Neither likely nor unlikely",
"Somewhat likely", "Extremely likely")
frequency_charity <- c("Once a week", "Once a month", "Once every three months",
"Once every six months", "Once a year", "Once every few years",
"Never")
frequency_public_affairs <- c("Most of the time", "Some of the time",
"Only now and then", "Hardly at all")
levels_ideology <- c("Strong liberal", "Liberal", "Independent, leaning liberal",
"Independent", "Independent, leaning conservative",
"Conservative", "Very conservative")
levels_education <- c("Less than high school", "High school graduate",
"Some college", "2 year degree", "4 year degree",
"Graduate or professional degree", "Doctorate")
frequency_religion <- c("More than once a week", "Once a week", "Once or twice a month",
"A few times a year", "Seldom", "Never", "Don't know")
levels_gender <- c("Female", "Male", "Transgender", "Other", "Prefer not to say")
levels_income <- c("Less than $10,000", "$10,000 – $19,999", "$20,000 – $29,999",
"$30,000 – $39,999", "$40,000 – $49,999", "$50,000 – $59,999",
"$60,000 – $69,999", "$70,000 – $79,999", "$80,000 – $89,999",
"$90,000 – $99,999", "$100,000 – $149,999", "More than $150,000",
"Prefer not to say")
levels_age <- c("Under 18", "18 – 24", "25 – 34", "35 – 44", "45 – 54",
"55 – 64", "65 – 74", "75 – 84", "85 or older")
attention1_answer <- c("Green", "Yellow")
attention2_answer <- "Blue"
levels_attention <- c("Correct", "Incorrect")
# Qualtrics stores question information in the first 3 rows. All we care about
# are the column names in the first row.
results_meta <- read_csv(results_file, n_max = 2)
# The first 10 rows need to be skipped because the consent question has a bunch
# of line breaks and it messes with CSV line counting. The actual responses
# start on row 11. This doesn't entirely make sense, since read_csv can read the
# line break-filled row in just fine. Skipping said lines makes it choke. <shrug>
results_raw <- read_csv(results_file, skip = 16,
col_names = colnames(results_meta))
# Confirmation codes to exclude. These are people who failed the attention
# checks or who took the survey outside of MTurk.
#
# The code for determining these is in a script that is excluded from the main
# repository because of privacy issues (it deals directly with MTurk worker
# IDs), called "data/private_data/approve_mturkers.R". It outputs this text list
# of codes.
codes_to_exclude <- read_csv(here("data", "derived_data", "codes_to_exclude.csv"))
# Clean everything
results_check_attention <- results_raw %>%
# Only select necessary columns
select(id = ResponseId, confirmation_code = mTurkCode, duration = `Duration (in seconds)`,
start_date = StartDate, end_date = EndDate,
crackdown, issue, funding, starts_with("Q")) %>%
rename(favor_humanitarian = Q2.1_1, favor_human_rights = Q2.1_2,
favor_development = Q2.1_3, donate_likely = Q2.3,
amount_donate = Q2.4_1, amount_keep = Q2.4_2, amount_why = Q2.5,
give_charity = Q3.2, volunteer = Q3.3, political_knowledge = Q3.4,
ideology = Q3.5, education = Q3.6, religiosity = Q3.7,
gender = Q3.9, gender_other = Q3.9_4_TEXT, income = Q3.10, age = Q3.11) %>%
# Clean up experimental condition columns
mutate(issue = recode(issue, `human rights for refugees` = "Human rights",
`humanitarian assistance for refugees` = "Humanitarian assistance"),
funding = recode(funding, `government donors` = "Government",
`individual, private donors` = "Private")) %>%
mutate(crackdown = ifelse(str_detect(crackdown, "harshly restrict"),
"Crackdown", "No crackdown")) %>%
mutate(crackdown = factor(crackdown, levels = c("No crackdown", "Crackdown"),
ordered = TRUE),
issue = factor(issue, levels = c("Human rights", "Humanitarian assistance"),
ordered = TRUE),
funding = factor(funding, levels = c("Government", "Private"),
ordered = TRUE)) %>%
# Attention checks
mutate(attention1_correct = str_split(Q1.3, ",") %>%
map_lgl(~ all(attention1_answer %in% .) & length(.) == length(attention1_answer)),
attention2_correct = Q3.8 == attention2_answer) %>%
# This person contacted me separately to say that they accidentally did the first
# attention check wrong, but that they did pay attention: 6518634
mutate(attention1_correct = ifelse(confirmation_code == 6518634,
TRUE, attention1_correct))
results <- results_check_attention %>%
filter(!(confirmation_code %in% codes_to_exclude$confirmation_code)) %>%
filter(attention2_correct) %>%
# Factorize variables
mutate(donate_likely = factor(donate_likely, levels = likelihood, ordered = TRUE)) %>%
mutate_at(vars(favor_humanitarian, favor_human_rights, favor_development),
list(~factor(., levels = favorability, ordered = TRUE))) %>%
mutate(give_charity = factor(give_charity,
levels = frequency_charity, ordered = TRUE),
volunteer = factor(volunteer, levels = c("No", "Yes"), ordered = TRUE),
political_knowledge = factor(political_knowledge,
levels = frequency_public_affairs, ordered = TRUE),
ideology = factor(ideology, levels = levels_ideology, ordered = TRUE),
education = factor(education, levels = levels_education, ordered = TRUE),
religiosity = factor(religiosity, levels = frequency_religion, ordered = TRUE),
gender = recode(gender, `Other:` = "Other"),
gender = factor(gender, levels = levels_gender, ordered = TRUE),
income = factor(income, levels = levels_income, ordered = TRUE),
age = factor(age, levels = levels_age, ordered = TRUE),
check1 = factor(attention1_correct, levels = c(TRUE, FALSE),
labels = levels_attention, ordered = TRUE),
check2 = factor(attention2_correct, levels = c(TRUE, FALSE),
labels = levels_attention, ordered = TRUE)) %>%
# Dichotomize variables
mutate(donate_likely_bin = fct_recode(donate_likely,
`Not likely` = "Extremely unlikely",
`Not likely` = "Somewhat unlikely",
`Not likely` = "Neither likely nor unlikely",
Likely = "Somewhat likely",
Likely = "Extremely likely")) %>%
mutate_at(vars(favor_humanitarian, favor_human_rights, favor_development),
list(bin = ~fct_recode(.,
`Not favorable` = "Very unfavorable",
`Not favorable` = "Unfavorable",
`Not favorable` = "Neutral",
Favorable = "Favorable",
Favorable = "Very favorable"))) %>%
mutate(give_charity_3 = fct_recode(give_charity,
`At least once a month` = "Once a week",
`At least once a month` = "Once a month",
`Once a month-once a year` = "Once every three months",
`Once a month-once a year` = "Once every six months",
`Once a month-once a year` = "Once a year",
Rarely = "Once every few years",
Rarely = "Never") %>% fct_rev(),
give_charity_2 = fct_recode(give_charity,
`At least once a year` = "Once a week",
`At least once a year` = "Once a month",
`At least once a year` = "Once every three months",
`At least once a year` = "Once every six months",
`At least once a year` = "Once a year",
Rarely = "Once every few years",
Rarely = "Never")) %>%
mutate(political_knowledge_bin = fct_recode(political_knowledge,
`Often` = "Most of the time",
`Often` = "Some of the time",
`Not often` = "Only now and then",
`Not often` = "Hardly at all") %>% fct_rev()) %>%
mutate(ideology_3 = fct_recode(ideology,
`Liberal` = "Strong liberal",
`Liberal` = "Liberal",
`Liberal` = "Independent, leaning liberal",
`Independent` = "Independent",
`Conservative` = "Independent, leaning conservative",
`Conservative` = "Conservative",
`Conservative` = "Very conservative") %>% fct_rev()) %>%
mutate(ideology_bin = fct_recode(ideology_3,
`Not liberal` = "Independent",
`Not liberal` = "Conservative")) %>%
mutate(education_bin = fct_recode(education,
`No BA` = "Less than high school",
`No BA` = "High school graduate",
`No BA` = "Some college",
`No BA` = "2 year degree",
`BA and above` = "4 year degree",
`BA and above` = "Graduate or professional degree",
`BA and above` = "Doctorate")) %>%
mutate(religiosity_bin = fct_recode(religiosity,
`At least once a month` = "More than once a week",
`At least once a month` = "Once a week",
`At least once a month` = "Once or twice a month",
`Rarely` = "A few times a year",
`Rarely` = "Seldom",
`Rarely` = "Never",
NULL = "Don't know") %>% fct_rev()) %>%
mutate(income_clean = fct_recode(income, NULL = "Prefer not to say"),
income_bin = factor(income_clean >= median(.$income), levels = c(FALSE, TRUE),
labels = paste(c("Less than", "At least"), median(.$income)))) %>%
mutate(age_bin = factor(age >= median(.$age), levels = c(FALSE, TRUE),
labels = paste(c("Less than", "At least"), median(.$age))),
gender_bin = fct_collapse(gender,
Female = "Female",
`Not Female` = c("Male", "Transgender",
"Other", "Prefer not to say")))
# Save final clean data
saveRDS(results, here("data", "derived_data", "results_clean.rds"))
write_csvy(results, file = here("data", "derived_data", "results_clean.csv"),
metadata = here("data", "derived_data", "results_clean.yaml"), na = "NA")
# Save information about completion rates
completed_skeleton <- results_check_attention %>%
left_join(codes_to_exclude, by = "confirmation_code") %>%
replace_na(list(reason = "Approved")) %>%
expand(reason, crackdown, issue, funding)
completed_summary <- results_check_attention %>%
left_join(codes_to_exclude, by = "confirmation_code") %>%
replace_na(list(reason = "Approved")) %>%
count(reason, crackdown, issue, funding) %>%
right_join(completed_skeleton, by = c("reason", "crackdown", "issue", "funding")) %>%
replace_na(list(n = 0))
saveRDS(completed_summary, here("data", "derived_data", "completion_summary.rds"))
write_csv(completed_summary, here("data", "derived_data", "completion_summary.csv"))
## # http://dirk.eddelbuettel.com/blog/2017/11/27/#011_faster_package_installation_one
## VER=
## CCACHE=ccache
## CC=$(CCACHE) gcc$(VER)
## CXX=$(CCACHE) g++$(VER)
## CXX11=$(CCACHE) g++$(VER)
## CXX14=$(CCACHE) g++$(VER)
## FC=$(CCACHE) gfortran$(VER)
## F77=$(CCACHE) gfortran$(VER)
##
## CXX14FLAGS=-O3 -march=native -mtune=native -fPIC
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.2 (2020-06-22)
## os macOS Catalina 10.15.6
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-10-01
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
## backports 1.1.9 2020-08-24 [1] CRAN (R 4.0.2)
## base64enc 0.1-3 2015-07-28 [1] CRAN (R 4.0.0)
## blob 1.2.1 2020-01-20 [1] CRAN (R 4.0.0)
## broom 0.7.0 2020-07-09 [1] CRAN (R 4.0.2)
## callr 3.4.3 2020-03-28 [1] CRAN (R 4.0.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.0.0)
## cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.0)
## codetools 0.2-16 2018-12-24 [1] CRAN (R 4.0.2)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 4.0.0)
## crackdownsphilanthropy * 0.9 2020-10-01 [1] local
## crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.0)
## csvy * 0.3.0 2018-08-01 [1] CRAN (R 4.0.2)
## curl 4.3 2019-12-02 [1] CRAN (R 4.0.0)
## data.table 1.13.0 2020-07-24 [1] CRAN (R 4.0.2)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 4.0.0)
## dbplyr 1.4.4 2020-05-27 [1] CRAN (R 4.0.2)
## desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.0)
## devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
## digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.0)
## dplyr * 1.0.2 2020-08-18 [1] CRAN (R 4.0.2)
## ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.0)
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 4.0.0)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.0.2 2018-11-29 [1] CRAN (R 4.0.0)
## ggplot2 * 3.3.2 2020-06-19 [1] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gridExtra 2.3 2017-09-09 [1] CRAN (R 4.0.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.0)
## haven 2.3.1 2020-06-01 [1] CRAN (R 4.0.2)
## here * 0.1 2017-05-28 [1] CRAN (R 4.0.0)
## hms 0.5.3 2020-01-08 [1] CRAN (R 4.0.0)
## htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.0)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
## inline 0.3.15 2018-05-18 [1] CRAN (R 4.0.0)
## jsonlite 1.7.0 2020-06-25 [1] CRAN (R 4.0.2)
## knitr 1.29 2020-06-23 [1] CRAN (R 4.0.2)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.0)
## loo 2.3.1 2020-07-14 [1] CRAN (R 4.0.2)
## lubridate 1.7.9 2020-06-08 [1] CRAN (R 4.0.2)
## magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.0)
## matrixStats 0.56.0 2020-03-13 [1] CRAN (R 4.0.0)
## memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.0)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.0.2)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.0)
## pillar 1.4.6 2020-07-10 [1] CRAN (R 4.0.2)
## pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
## pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.2)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.0)
## processx 3.4.3 2020-07-05 [1] CRAN (R 4.0.0)
## ps 1.3.4 2020-08-11 [1] CRAN (R 4.0.2)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
## R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.0)
## Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.2)
## RcppParallel 5.0.2 2020-06-24 [1] CRAN (R 4.0.2)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 4.0.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 4.0.0)
## remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
## reprex 0.3.0 2019-05-16 [1] CRAN (R 4.0.0)
## rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
## rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.2)
## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.0)
## rstan 2.21.2 2020-07-27 [1] CRAN (R 4.0.2)
## rstudioapi 0.11 2020-02-07 [1] CRAN (R 4.0.0)
## rvest 0.3.6 2020-07-25 [1] CRAN (R 4.0.2)
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
## StanHeaders 2.21.0-6 2020-08-16 [1] CRAN (R 4.0.2)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
## testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.0)
## tibble * 3.0.3 2020-07-10 [1] CRAN (R 4.0.2)
## tidyr * 1.1.2 2020-08-27 [1] CRAN (R 4.0.2)
## tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.0)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 4.0.0)
## usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.0)
## V8 3.2.0 2020-06-19 [1] CRAN (R 4.0.2)
## vctrs 0.3.4 2020-08-29 [1] CRAN (R 4.0.2)
## withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.0)
## xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library