library(tidyverse)
library(targets)
library(modelsummary)
library(tinytable)
library(marginaleffects)
library(ggdist)
library(scales)
library(WeightIt)
library(ggdag)
library(dagitty)
library(parameters)
tar_config_set(
store = here::here('_targets'),
script = here::here('_targets.R')
)
<- tar_read(data_observational)
observed
# Plotting functions
invisible(list2env(tar_read(graphic_functions), .GlobalEnv))
set_annotation_fonts()
<- tribble(
gof_map ~raw, ~clean, ~fmt, ~omit,
"nobs", "N", 0, FALSE,
"r.squared", "$R^2$", 2, FALSE,
"adj.r.squared", "\\(R^2\\) adjusted", 2, FALSE
)
Observational
DAG
<- tribble(
coords ~name, ~x, ~y, ~type, ~color, ~label,
"X", 1, 3, "Treatment", clrs$Prism[4], "Experiential\nphilanthropy",
"Y", 3, 3, "Outcome", clrs$Prism[2], "Charitable\nbehavior",
"Z1", 5/3, 4.2, "Confounder", clrs$Prism[6], "Social awareness",
"Z2", 7/3, 4.2, "Confounder", clrs$Prism[6], "Age",
"Z3", 5/3, 5, "Confounder", clrs$Prism[6], "GPA",
"Z4", 7/3, 5, "Confounder", clrs$Prism[6], "Income",
"M", 2, 2, "Mediator", clrs$Prism[3], "Community\nconnections",
"C", 2, 1, "Collider", clrs$Prism[8], "Employment in\nnonprofit sector"
|>
) mutate(type = fct_inorder(type))
<- dagify(
data_full_dag ~ X + Z1 + Z2 + Z3 + Z4 + M,
Y ~ Z1 + Z2 + Z3 + Z4,
X ~ X,
M ~ X + Y,
C coords = coords,
labels = pull(coords, label, name = name)
|>
) tidy_dagitty() |>
left_join(select(coords, name, type, color), by = join_by(name)) |>
mutate(arrow_color = case_when(
== "X" & to == "Y" ~ "black",
name == "C" ~ clrs$Prism[8],
to == "M" | (name == "M" & to == "Y") ~ clrs$Prism[3],
to .default = color
))
ggplot(data_full_dag, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_dag_edges(edge_width = 0.5) +
geom_dag_edges(aes(edge_color = arrow_color), edge_width = 0.5) +
geom_dag_point(aes(color = type), size = 12) +
geom_dag_label(
data = filter(data_full_dag, type != "Confounder"), aes(label = label),
size = 4, nudge_y = -0.46, color = "black", lineheight = 1
+
) geom_dag_label(
data = filter(data_full_dag, type == "Confounder"), aes(label = label),
size = 4, nudge_y = 0.35, color = "black", lineheight = 1
+
) scale_color_manual(
values = coords |> distinct(type, color) |> pull(color, name = type),
guide = guide_legend(
title = NULL, override.aes = list(size = 6), label.position = "bottom"
)+
) coord_cartesian(xlim = c(0.98, 3.02)) +
theme_np_dag() +
theme(legend.position = "top")
EDA
Confounders
ggplot(observed, aes(x = social_awareness, y = experiential_learning)) +
geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
pch = 19, color = "grey20", scale = 0.2
+
) geom_smooth(
method = "glm", method.args = list(family = binomial(link = "logit")),
se = FALSE
+
) scale_y_continuous(labels = label_percent()) +
labs(x = "Social awareness index", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(observed, aes(x = age, y = experiential_learning)) +
geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
pch = 19, color = "grey20", scale = 0.4
+
) geom_smooth(
method = "glm", method.args = list(family = binomial(link = "logit")),
se = FALSE
+
) scale_y_continuous(labels = label_percent()) +
labs(x = "Age", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(observed, aes(x = gpa, y = experiential_learning)) +
geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
pch = 19, color = "grey20", scale = 0.4
+
) geom_smooth(
method = "glm", method.args = list(family = binomial(link = "logit")),
se = FALSE
+
) scale_y_continuous(labels = label_percent()) +
labs(x = "GPA", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(observed, aes(x = income, y = experiential_learning)) +
geom_dots(aes(side = ifelse(experiential_learning == 1, "bottom", "top")),
pch = 19, color = "grey20", scale = 0.4
+
) geom_smooth(
method = "glm", method.args = list(family = binomial(link = "logit")),
se = FALSE
+
) scale_x_continuous(labels = label_dollar()) +
scale_y_continuous(labels = label_percent()) +
labs(x = "Income", y = "Experiential learning participation")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(observed, aes(x = social_awareness, y = prosocial_index)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(observed, aes(x = age, y = prosocial_index)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(observed, aes(x = gpa, y = prosocial_index)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(observed, aes(x = income, y = prosocial_index)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Mediator
|>
observed mutate(experiential_learning_fct = as.logical(experiential_learning)) |>
ggplot(aes(x = experiential_learning_fct, y = community_connections)) +
geom_point(position = position_jitter(height = 0)) +
geom_boxplot(aes(group = experiential_learning_fct))
ggplot(observed, aes(x = community_connections, y = prosocial_index)) +
geom_point() +
labs(x = "Community connections", y = "Prosocial index")
Collider
|>
observed mutate(experiential_learning_fct = as.logical(experiential_learning)) |>
mutate(employed_nonprofit_fct = as.logical(employed_nonprofit)) |>
ggplot(aes(x = experiential_learning_fct, y = employed_nonprofit_fct)) +
geom_point(position = position_jitter())
|>
observed mutate(employed_nonprofit_fct = as.logical(employed_nonprofit)) |>
ggplot(aes(x = prosocial_index, y = employed_nonprofit_fct)) +
geom_point(position = position_jitter(width = 0))
Model comparisons
<- lm(
model_naive ~ experiential_learning,
prosocial_index data = observed
)
<- lm(
model_adjusted_correct ~ experiential_learning + social_awareness + age + gpa + income,
prosocial_index data = observed
)
<- lm(
model_with_mediator ~ experiential_learning + social_awareness + age + gpa + income + community_connections,
prosocial_index data = observed
)
<- lm(
model_collider_selection ~ experiential_learning + social_awareness + age + gpa + income,
prosocial_index data = filter(observed, employed_nonprofit == 1)
)
<- lm(
model_collider_post_treatment ~ experiential_learning + social_awareness + age + gpa + income + employed_nonprofit,
prosocial_index data = observed
)
<- list(
models "Correctly adjusted" = model_adjusted_correct,
"Naive" = model_naive,
"Mediator included" = model_with_mediator,
"Collider (selection)" = model_collider_selection,
"Collider (post-treatment covariate)" = model_collider_post_treatment
)
modelsummary(
models,statistic = "[{conf.low}, {conf.high}]",
fmt = 2,
gof_map = gof_map,
add_columns = tibble("True effect" = c(NA, NA, 10)) |> magrittr::set_attr("position", 2)
|>
) style_tt(
i = 3:4, j = 1:6,
background = clrs$Prism[6], color = "#ffffff", bold = TRUE
)
True effect | Correctly adjusted | Naive | Mediator included | Collider (selection) | Collider (post-treatment covariate) | |
---|---|---|---|---|---|---|
(Intercept) | 33.42 | 82.48 | 30.86 | 67.65 | 40.71 | |
[29.04, 37.80] | [81.88, 83.08] | [26.67, 35.05] | [63.24, 72.06] | [36.66, 44.77] | ||
experiential_learning | 10.00 | 10.03 | 14.25 | 7.95 | 5.57 | 6.27 |
[9.33, 10.74] | [13.36, 15.15] | [7.18, 8.71] | [4.87, 6.27] | [5.48, 7.05] | ||
social_awareness | 0.36 | 0.36 | 0.17 | 0.29 | ||
[0.34, 0.38] | [0.33, 0.38] | [0.14, 0.19] | [0.27, 0.31] | |||
age | 0.44 | 0.45 | 0.23 | 0.36 | ||
[0.31, 0.56] | [0.33, 0.57] | [0.11, 0.34] | [0.24, 0.47] | |||
gpa | 2.66 | 2.59 | 0.79 | 2.40 | ||
[2.03, 3.29] | [1.99, 3.19] | [0.22, 1.37] | [1.83, 2.97] | |||
income | 0.00 | 0.00 | 0.00 | 0.00 | ||
[0.00, 0.00] | [0.00, 0.00] | [0.00, 0.00] | [0.00, 0.00] | |||
community_connections | 0.82 | |||||
[0.67, 0.96] | ||||||
employed_nonprofit | 6.91 | |||||
[6.07, 7.75] | ||||||
N | 1135 | 1135 | 1135 | 573 | 1135 | |
$R^2$ | 0.72 | 0.46 | 0.75 | 0.47 | 0.77 | |
\(R^2\) adjusted | 0.72 | 0.46 | 0.74 | 0.46 | 0.77 |
modelsummary(
models,statistic = "[{conf.low}, {conf.high}]",
fmt = 2,
coef_map = c("experiential_learning"),
gof_map = gof_map,
add_columns = tibble("True effect" = 10) |> magrittr::set_attr("position", 2)
)
True effect | Correctly adjusted | Naive | Mediator included | Collider (selection) | Collider (post-treatment covariate) | |
---|---|---|---|---|---|---|
experiential_learning | 10.00 | 10.03 | 14.25 | 7.95 | 5.57 | 6.27 |
[9.33, 10.74] | [13.36, 15.15] | [7.18, 8.71] | [4.87, 6.27] | [5.48, 7.05] | ||
N | 1135 | 1135 | 1135 | 573 | 1135 | |
$R^2$ | 0.72 | 0.46 | 0.75 | 0.47 | 0.77 | |
\(R^2\) adjusted | 0.72 | 0.46 | 0.74 | 0.46 | 0.77 |
<- enframe(models) |>
treatment_effects mutate(params = map(value, \(x) model_parameters(x))) |>
unnest(params) |>
filter(Parameter == "experiential_learning") |>
mutate(name = factor(name, levels = names(models)))
ggplot(treatment_effects, aes(x = Coefficient, y = fct_rev(name))) +
geom_vline(xintercept = 10, color = clrs$Prism[2]) +
geom_pointrange(aes(xmin = CI_low, xmax = CI_high)) +
labs(x = "Treatment effect", y = NULL) +
theme_np(base_size = 16) +
theme(panel.grid.major.y = element_blank())
Fancier estimation
<- weightit(
weights_ate ~ social_awareness + age + gpa + income,
experiential_learning data = observed,
method = "glm",
estimand = "ATE"
)
$wts_ate <- weights_ate$weights
observed
<- tibble(
plot_data_weights_ate propensity = weights_ate$ps,
weight = weights_ate$weights,
treatment = weights_ate$treat
)
ggplot() +
geom_histogram(data = filter(plot_data_weights_ate, treatment == 1),
bins = 50, aes(x = propensity, weight = weight, fill = "Treated pseudo-population")) +
geom_histogram(data = filter(plot_data_weights_ate, treatment == 0),
bins = 50, aes(x = propensity, weight = weight, y = -after_stat(count), fill = "Untreated psuedo-population")) +
geom_histogram(data = filter(plot_data_weights_ate, treatment == 1),
bins = 50, aes(x = propensity, fill = "Treated people")) +
geom_histogram(data = filter(plot_data_weights_ate, treatment == 0),
bins = 50, aes(x = propensity, y = -after_stat(count), fill = "Untreated people")) +
geom_hline(yintercept = 0, color = "white", linewidth = 0.25) +
scale_x_continuous(labels = scales::label_percent()) +
scale_y_continuous(label = abs) +
scale_fill_manual(
values = c(
$Prism[2], colorspace::lighten(clrs$Prism[2], 0.5),
clrs$Prism[8], colorspace::lighten(clrs$Prism[8], 0.65)),
clrsguide = guide_legend(reverse = FALSE, nrow = 2)) +
labs(x = "Propensity", y = "Count", fill = NULL) +
theme_np() +
theme(
panel.grid = element_blank(),
legend.position = "top",
legend.key.size = unit(0.65, "lines")
)
<- lm(
model_outcome_ate ~ experiential_learning,
prosocial_index data = observed,
weights = wts_ate
)
avg_comparisons(model_outcome_ate, variables = "experiential_learning")
##
## Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## 10 0.477 21 <0.001 323.1 9.08 11
##
## Term: experiential_learning
## Type: response
## Comparison: mean(1) - mean(0)
## Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted