RCT

library(tidyverse)
library(targets)
library(modelsummary)
library(tinytable)
library(ggdist)
library(scales)
library(ggdag)
library(dagitty)
library(parameters)

tar_config_set(
  store = here::here('_targets'),
  script = here::here('_targets.R')
)

rct <- tar_read(data_rct)

# Plotting functions
invisible(list2env(tar_read(graphic_functions), .GlobalEnv))
set_annotation_fonts()

gof_map <- tribble(
  ~raw,            ~clean,               ~fmt, ~omit,
  "nobs",          "N",                  0,    FALSE,
  "r.squared",     "$R^2$",          2,    FALSE,
  "adj.r.squared", "\\(R^2\\) adjusted", 2,    FALSE
)

DAG

coords <- tribble(
  ~name, ~x,   ~y,   ~type,         ~color,         ~label,
  "X",   1,    3,    "Treatment",   clrs$Prism[4],  "Experiential\nphilanthropy",
  "Y",   3,    3,    "Outcome",     clrs$Prism[2],  "Charitable\nbehavior",
  "Z1",  5/3,  4.2,  "Confounder",  clrs$Prism[6],  "Social awareness",
  "Z2",  7/3,  4.2,  "Confounder",  clrs$Prism[6],  "Age",
  "Z3",  5/3,  5,    "Confounder",  clrs$Prism[6],  "GPA",
  "Z4",  7/3,  5,    "Confounder",  clrs$Prism[6],  "Income",
  "M",   2,    2,    "Mediator",    clrs$Prism[3],  "Community\nconnections",
  "C",   2,    1,    "Collider",    clrs$Prism[8],  "Employment in\nnonprofit sector"
) |> 
  mutate(type = fct_inorder(type))

data_full_dag <- dagify(
  Y ~ X + Z1 + Z2 + Z3 + Z4 + M,
  M ~ X,
  C ~ X + Y,
  coords = coords,
  labels = pull(coords, label, name = name)
) |> 
  tidy_dagitty() |> 
  left_join(select(coords, name, type, color), by = join_by(name)) |> 
  mutate(arrow_color = case_when(
    name == "X" & to == "Y" ~ "black",
    to == "C" ~ clrs$Prism[8],
    to == "M" | (name == "M" & to == "Y") ~ clrs$Prism[3],
    .default = color
  ))

ggplot(data_full_dag, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_dag_edges(edge_width = 0.5) +
  geom_dag_edges(aes(edge_color = arrow_color), edge_width = 0.5) +
  geom_dag_point(aes(color = type), size = 12) +
  geom_dag_label(
    data = filter(data_full_dag, type != "Confounder"), aes(label = label), 
    size = 4, nudge_y = -0.46, color = "black", lineheight = 1
  ) +
  geom_dag_label(
    data = filter(data_full_dag, type == "Confounder"), aes(label = label), 
    size = 4, nudge_y = 0.35, color = "black", lineheight = 1
  ) +
  scale_color_manual(
    values = coords |> distinct(type, color) |> pull(color, name = type),
    guide = guide_legend(
      title = NULL, override.aes = list(size = 6), label.position = "bottom"
    )
  ) +
  coord_cartesian(xlim = c(0.98, 3.02)) +
  theme_np_dag() + 
  theme(legend.position = "top")

EDA

Confounders

ggplot(rct, aes(x = social_awareness, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.2
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Social awareness index", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'


ggplot(rct, aes(x = age, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.4
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Age", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'


ggplot(rct, aes(x = gpa, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.4
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "GPA", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'


ggplot(rct, aes(x = income, y = experiential_learning)) +
  geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
    pch = 19, color = "grey20", scale = 0.4
  ) +
  geom_smooth(
    method = "glm", method.args = list(family = binomial(link = "logit")),
    se = FALSE
  ) +
  scale_x_continuous(labels = label_dollar()) +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Income", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(rct, aes(x = social_awareness, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


ggplot(rct, aes(x = age, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


ggplot(rct, aes(x = gpa, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


ggplot(rct, aes(x = income, y = prosocial_index)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Mediator

rct |> 
  mutate(experiential_learning_fct = as.logical(experiential_learning)) |> 
  ggplot(aes(x = experiential_learning_fct, y = community_connections)) +
  geom_point(position = position_jitter(height = 0)) +
  geom_boxplot(aes(group = experiential_learning_fct))


ggplot(rct, aes(x = community_connections, y = prosocial_index)) +
  geom_point() +
  labs(x = "Community connections", y = "Prosocial index")

Collider

rct |> 
  mutate(experiential_learning_fct = as.logical(experiential_learning)) |> 
  mutate(employed_nonprofit_fct = as.logical(employed_nonprofit)) |> 
  ggplot(aes(x = experiential_learning_fct, y = employed_nonprofit_fct)) +
  geom_point(position = position_jitter())


rct |> 
  mutate(employed_nonprofit_fct = as.logical(employed_nonprofit)) |> 
  ggplot(aes(x = prosocial_index, y = employed_nonprofit_fct)) +
  geom_point(position = position_jitter(width = 0))

Model comparisons

model_correct <- lm(
  prosocial_index ~ experiential_learning, 
  data = rct
)

model_adjusted <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income,
  data = rct
)

model_with_mediator <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income + community_connections, 
  data = rct
)

model_collider_selection <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income, 
  data = filter(rct, employed_nonprofit == 1)
)

model_collider_post_treatment <- lm(
  prosocial_index ~ experiential_learning + social_awareness + age + gpa + income + employed_nonprofit, 
  data = rct
)

models <- list(
  "Treatment only" = model_correct,
  "Covariates included" = model_adjusted,
  "Mediator included" = model_with_mediator,
  "Collider (selection)" = model_collider_selection,
  "Collider (post-treatment covariate)" = model_collider_post_treatment
)

modelsummary(
  models,
  statistic = "[{conf.low}, {conf.high}]",
  fmt = 2,
  gof_map = gof_map,
  add_columns = tibble("True effect" = c(NA, NA, 10)) |> magrittr::set_attr("position", 2)
) |> 
  style_tt(
    i = 3:4, j = 1:7, 
    background = clrs$Prism[6], color = "#ffffff", bold = TRUE
  )

	True effect	Treatment only	Covariates included	Mediator included	Collider (selection)	Collider (post-treatment covariate)
(Intercept)		84.90	32.44	29.43	59.89	40.89
		[84.28, 85.51]	[28.35, 36.53]	[25.58, 33.28]	[55.67, 64.10]	[37.09, 44.70]
experiential_learning	10.00	10.05	10.38	7.92	5.16	6.53
		[9.18, 10.92]	[9.75, 11.01]	[7.24, 8.61]	[4.47, 5.86]	[5.82, 7.24]
social_awareness			0.35	0.35	0.20	0.28
			[0.33, 0.37]	[0.33, 0.37]	[0.18, 0.22]	[0.26, 0.30]
age			0.56	0.56	0.33	0.46
			[0.43, 0.69]	[0.44, 0.68]	[0.21, 0.45]	[0.34, 0.57]
gpa			2.58	2.66	1.95	2.12
			[1.99, 3.16]	[2.11, 3.20]	[1.39, 2.51]	[1.58, 2.65]
income			0.00	0.00	0.00	0.00
			[0.00, 0.00]	[0.00, 0.00]	[0.00, 0.00]	[0.00, 0.00]
community_connections				0.97
				[0.83, 1.11]
employed_nonprofit						6.72
						[5.97, 7.48]
N		1328	1328	1328	735	1328
$R^2$		0.28	0.63	0.68	0.40	0.70
$R^2$ adjusted		0.28	0.63	0.67	0.40	0.70

modelsummary(
  models,
  statistic = "[{conf.low}, {conf.high}]",
  fmt = 2,
  coef_map = c("experiential_learning"),
  gof_map = gof_map,
  add_columns = tibble("True effect" = 10) |> magrittr::set_attr("position", 2)
)

	True effect	Treatment only	Covariates included	Mediator included	Collider (selection)	Collider (post-treatment covariate)
experiential_learning	10.00	10.05	10.38	7.92	5.16	6.53
		[9.18, 10.92]	[9.75, 11.01]	[7.24, 8.61]	[4.47, 5.86]	[5.82, 7.24]
N		1328	1328	1328	735	1328
$R^2$		0.28	0.63	0.68	0.40	0.70
$R^2$ adjusted		0.28	0.63	0.67	0.40	0.70

treatment_effects <- enframe(models) |> 
  mutate(params = map(value, \(x) model_parameters(x))) |> 
  unnest(params) |> 
  filter(Parameter == "experiential_learning") |> 
  mutate(name = factor(name, levels = names(models)))

ggplot(treatment_effects, aes(x = Coefficient, y = fct_rev(name))) +
  geom_vline(xintercept = 10, color = clrs$Prism[2]) +
  geom_pointrange(aes(xmin = CI_low, xmax = CI_high)) +
  labs(x = "Treatment effect", y = NULL) +
  theme_np(base_size = 16) +
  theme(panel.grid.major.y = element_blank())