RCT

library(tidyverse)
library(targets)
library(modelsummary)
library(tinytable)
library(ggdist)
library(scales)
library(ggdag)
library(dagitty)
library(parameters)

tar_config_set(
  store = here::here('_targets'),
  script = here::here('_targets.R')
)

rct <- tar_read(data_rct)

# Plotting functions
invisible(list2env(tar_read(graphic_functions), .GlobalEnv))
set_annotation_fonts()

gof_map <- tribble(
  ~raw,            ~clean,               ~fmt, ~omit,
  "nobs",          "N",                  0,    FALSE,
  "r.squared",     "$R^2$",          2,    FALSE,
  "adj.r.squared", "\\(R^2\\) adjusted", 2,    FALSE
)

DAG

coords <- tribble(
  ~name, ~x,   ~y,   ~type,         ~color,         ~label,
  "X",   1,    3,    "Treatment",   clrs$Prism[4],  "Experiential\nphilanthropy",
  "Y",   3,    3,    "Outcome",     clrs$Prism[2],  "Charitable\nbehavior",
  "Z1",  5/3,  4.2,  "Confounder",  clrs$Prism[6],  "Social awareness",
  "Z2",  7/3,  4.2,  "Confounder",  clrs$Prism[6],  "Age",
  "Z3",  5/3,  5,    "Confounder",  clrs$Prism[6],  "GPA",
  "Z4",  7/3,  5,    "Confounder",  clrs$Prism[6],  "Income",
  "M",   2,    2,    "Mediator",    clrs$Prism[3],  "Community\nconnections",
  "C",   2,    1,    "Collider",    clrs$Prism[8],  "Employment in\nnonprofit sector"
) |> 
  mutate(type = fct_inorder(type))

data_full_dag <- dagify(
  Y ~ X + Z1 + Z2 + Z3 + Z4 + M,
  M ~ X,
  C ~ X + Y,
  coords = coords,
  labels = pull(coords, label, name = name)
) |> 
  tidy_dagitty() |> 
  left_join(select(coords, name, type, color), by = join_by(name)) |> 
  mutate(arrow_color = case_when(
    name == "X" & to == "Y" ~ "black",
    to == "C" ~ clrs$Prism[8],
    to == "M" | (name == "M" & to == "Y") ~ clrs$Prism[3],
    .default = color
  ))

ggplot(data_full_dag, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_dag_edges(edge_width = 0.5) +
  geom_dag_edges(aes(edge_color = arrow_color), edge_width = 0.5) +
  geom_dag_point(aes(color = type), size = 12) +
  geom_dag_label(
    data = filter(data_full_dag, type != "Confounder"), aes(label = label), 
    size = 4, nudge_y = -0.46, color = "black", lineheight = 1
  ) +
  geom_dag_label(
    data = filter(data_full_dag, type == "Confounder"), aes(label = label), 
    size = 4, nudge_y = 0.35, color = "black", lineheight = 1
  ) +
  scale_color_manual(
    values = coords |> distinct(type, color) |> pull(color, name = type),
    guide = guide_legend(
      title = NULL, override.aes = list(size = 6), label.position = "bottom"
    )
  ) +
  coord_cartesian(xlim = c(0.98, 3.02)) +
  theme_np_dag() + 
  theme(legend.position = "top")

EDA

Confounders

ggplot(rct, aes(x = social_awareness, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.2
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Social awareness index", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'


ggplot(rct, aes(x = age, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.4
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Age", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'


ggplot(rct, aes(x = gpa, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.4
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "GPA", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'


ggplot(rct, aes(x = income, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.4
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_x_continuous(labels = label_dollar()) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Income", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(rct, aes(x = social_awareness, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


ggplot(rct, aes(x = age, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


ggplot(rct, aes(x = gpa, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


ggplot(rct, aes(x = income, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Mediator

rct |> 
  mutate(experiential_learning_fct = as.logical(experiential_learning)) |> 
  ggplot(aes(x = experiential_learning_fct, y = community_connections)) +
  geom_point(position = position_jitter(height = 0)) +
  geom_boxplot(aes(group = experiential_learning_fct))


ggplot(rct, aes(x = community_connections, y = prosocial_index)) +
  geom_point() +
  labs(x = "Community connections", y = "Prosocial index")

Collider

rct |> 
  mutate(experiential_learning_fct = as.logical(experiential_learning)) |> 
  mutate(employed_nonprofit_fct = as.logical(employed_nonprofit)) |> 
  ggplot(aes(x = experiential_learning_fct, y = employed_nonprofit_fct)) +
  geom_point(position = position_jitter())


rct |> 
  mutate(employed_nonprofit_fct = as.logical(employed_nonprofit)) |> 
  ggplot(aes(x = prosocial_index, y = employed_nonprofit_fct)) +
  geom_point(position = position_jitter(width = 0))

Model comparisons

model_correct <- lm(
  prosocial_index ~ experiential_learning, 
  data = rct
)

model_adjusted <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income,
  data = rct
)

model_with_mediator <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income + community_connections, 
  data = rct
)

model_collider_selection <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income, 
  data = filter(rct, employed_nonprofit == 1)
)

model_collider_post_treatment <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income + employed_nonprofit, 
  data = rct
)

models <- list(
  "Treatment only" = model_correct,
  "Covariates included" = model_adjusted,
  "Mediator included" = model_with_mediator,
  "Collider (selection)" = model_collider_selection,
  "Collider (post-treatment covariate)" = model_collider_post_treatment
)
modelsummary(
  models,
  statistic = "[{conf.low}, {conf.high}]",
  fmt = 2,
  gof_map = gof_map,
  add_columns = tibble("True effect" = c(NA, NA, 10)) |> magrittr::set_attr("position", 2)
) |> 
  style_tt(
    i = 3:4, j = 1:7, 
    background = clrs$Prism[6], color = "#ffffff", bold = TRUE
  )
True effect Treatment only Covariates included Mediator included Collider (selection) Collider (post-treatment covariate)
(Intercept) 84.90 32.44 29.43 59.89 40.89
[84.28, 85.51] [28.35, 36.53] [25.58, 33.28] [55.67, 64.10] [37.09, 44.70]
experiential_learning 10.00 10.05 10.38 7.92 5.16 6.53
[9.18, 10.92] [9.75, 11.01] [7.24, 8.61] [4.47, 5.86] [5.82, 7.24]
social_awareness 0.35 0.35 0.20 0.28
[0.33, 0.37] [0.33, 0.37] [0.18, 0.22] [0.26, 0.30]
age 0.56 0.56 0.33 0.46
[0.43, 0.69] [0.44, 0.68] [0.21, 0.45] [0.34, 0.57]
gpa 2.58 2.66 1.95 2.12
[1.99, 3.16] [2.11, 3.20] [1.39, 2.51] [1.58, 2.65]
income 0.00 0.00 0.00 0.00
[0.00, 0.00] [0.00, 0.00] [0.00, 0.00] [0.00, 0.00]
community_connections 0.97
[0.83, 1.11]
employed_nonprofit 6.72
[5.97, 7.48]
N 1328 1328 1328 735 1328
$R^2$ 0.28 0.63 0.68 0.40 0.70
\(R^2\) adjusted 0.28 0.63 0.67 0.40 0.70
modelsummary(
  models,
  statistic = "[{conf.low}, {conf.high}]",
  fmt = 2,
  coef_map = c("experiential_learning"),
  gof_map = gof_map,
  add_columns = tibble("True effect" = 10) |> magrittr::set_attr("position", 2)
)
True effect Treatment only Covariates included Mediator included Collider (selection) Collider (post-treatment covariate)
experiential_learning 10.00 10.05 10.38 7.92 5.16 6.53
[9.18, 10.92] [9.75, 11.01] [7.24, 8.61] [4.47, 5.86] [5.82, 7.24]
N 1328 1328 1328 735 1328
$R^2$ 0.28 0.63 0.68 0.40 0.70
\(R^2\) adjusted 0.28 0.63 0.67 0.40 0.70
treatment_effects <- enframe(models) |> 
  mutate(params = map(value, \(x) model_parameters(x))) |> 
  unnest(params) |> 
  filter(Parameter == "experiential_learning") |> 
  mutate(name = factor(name, levels = names(models)))

ggplot(treatment_effects, aes(x = Coefficient, y = fct_rev(name))) +
  geom_vline(xintercept = 10, color = clrs$Prism[2]) +
  geom_pointrange(aes(xmin = CI_low, xmax = CI_high)) +
  labs(x = "Treatment effect", y = NULL) +
  theme_np(base_size = 16) +
  theme(panel.grid.major.y = element_blank())