3  Causal effects

Marginal means (MMs) and average marginal component effects (AMCEs)

3.1 Marginal means, differences in marginal means, and regression coefficients

At its core, a “marginal mean” refers to the literal mean in the margins in a contingency table of model predictions based on a balanced grid of predictors.

To illustrate, we’ll make a model that predicts penguin weight based on species and sex and then make predictions for a balanced grid of covariates (i.e. male Adelie, female Adelie, male Chinstrap, female Chinstrap, male Gentoo, female Gentoo):

Code
library(tidyverse)
library(marginaleffects)
Code
penguins <- penguins |> drop_na(sex)

model <- lm(body_mass ~ species + sex, data = penguins)

preds <- model |> 
  predictions(datagrid(species = unique, sex = unique))
Code
preds |>
  select(estimate, species, sex) |>
  pivot_wider(names_from = sex, values_from = estimate) |>
  select(Species = species, Female = female, Male = male) |>
  tt(
    notes = "Predicted penguin weights (g) from model `lm(body_mass ~ species + sex)`"
  ) |>
  group_tt(
    j = list("Sex" = 2:3)
  ) |>
  format_tt("notes", markdown = TRUE) |>
  style_tt(j = 2:3, align = "c")
Sex
Species Female Male
Predicted penguin weights (g) from model lm(body_mass ~ species + sex)
Adelie 3372 4040
Gentoo 4750 5418
Chinstrap 3399 4067

We can add a column and row in the margins to show the species-specific and sex-specific average predicted weights. For the sex-specific averages, all the between-species variation is “marginalized out” or accounted for. For the species-specific averages, all the between-sex variation is similarly marginalized out:

Code
mean_sex <- preds |>
  group_by(sex) |>
  mutate(avg_estimate = mean(estimate)) |>
  select(species, sex, estimate, avg_estimate) |>
  pivot_wider(names_from = species, values_from = estimate) |>
  mutate(
    avg_explanation = glue::glue(
      "{round(avg_estimate, 0)}<br>",
      "<span style='font-size:70%'>",
      "({round(Adelie, 0)} + {round(Chinstrap, 0)} + {round(Gentoo, 0)}) / 3",
      "</span>"
    )
  ) |>
  select(sex, avg_explanation) |>
  mutate(species = "Marginal mean") |>
  pivot_wider(names_from = sex, values_from = avg_explanation)

mean_species <- preds |>
  group_by(species) |>
  summarize(avg_estimate = mean(estimate))

all_combined <- preds |>
  ungroup() |>
  select(estimate, species, sex) |>
  pivot_wider(names_from = sex, values_from = estimate) |>
  left_join(mean_species, by = join_by(species)) |>
  arrange(species) |>
  mutate(
    avg_explanation = glue::glue(
      "{round(avg_estimate, 0)}<br>",
      "<span style='font-size:70%'>",
      "({round(female, 0)} + {round(male, 0)}) / 2",
      "</span>"
    )
  ) |>
  mutate(across(c(female, male), ~ as.character(round(., 0)))) |>
  select(-avg_estimate) |>
  bind_rows(mean_sex)

all_combined |>
  select(
    Species = species,
    Female = female,
    Male = male,
    `Marginal mean` = avg_explanation
  ) |>
  tt(
    notes = "Predicted penguin weights (g) from model `lm(body_mass ~ species + sex)`"
  ) |>
  group_tt(
    j = list("Sex" = 2:3)
  ) |>
  format_tt(replace = "") |>
  format_tt("notes", markdown = TRUE) |>
  style_tt(j = 2:4, align = "c") |>
  style_tt(i = 4, line = "t") |>
  theme_html(i = 4, css = "border-top-style: dashed;") |>
  style_tt(i = 1:4, j = 3, line = "r") |>
  theme_html(j = 3, css = "border-right-style: dashed;")
Sex
Species Female Male Marginal mean
Predicted penguin weights (g) from model lm(body_mass ~ species + sex)
Adelie 3372 4040 3706
(3372 + 4040) / 2
Chinstrap 3399 4067 3733
(3399 + 4067) / 2
Gentoo 4750 5418 5084
(4750 + 5418) / 2
Marginal mean 3841
(3372 + 3399 + 4750) / 3
4508
(4040 + 4067 + 5418) / 3

Because regression is just fancy averages, the differences in marginal means here are actually identical to the coefficients in regression model. For instance, here are the results from the model:

Code
model |> broom::tidy()
## # A tibble: 4 × 5
##   term             estimate std.error statistic   p.value
##   <chr>               <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)        3372.       31.4   107.    4.34e-258
## 2 speciesChinstrap     26.9      46.5     0.579 5.63e-  1
## 3 speciesGentoo      1378.       39.1    35.2   1.05e-113
## 4 sexmale             668.       34.7    19.2   8.73e- 56

The sexmale coefficient of 667.56 represents the effect of moving from male to female when species is held constant.

The marginal means table shows the same thing. Marginalizing over species (i.e. holding species constant), the average predicted weight for female penguins is 3840.6 and for male penguins is 4508.2. The difference in those marginal means is 667.56—identical to the sexmale coefficient!

Code
amce_species <- preds |>
  group_by(species) |>
  summarize(avg_estimate = mean(estimate)) |>
  mutate(amce = avg_estimate - avg_estimate[1]) |>
  mutate(
    amce_nice = glue::glue(
      "{round(amce, 0)}<br>",
      "<span style='font-size:70%'>",
      "{species} − {species[1]}, or<br>",
      "{round(avg_estimate, 0)} − {round(avg_estimate[1], 0)}",
      "</span>"
    )
  ) |>
  select(species, amce_nice)

amce_sex <- preds |>
  group_by(sex) |>
  summarize(avg_estimate = mean(estimate)) |>
  mutate(amce = avg_estimate - avg_estimate[1]) |>
  mutate(
    amce_nice = glue::glue(
      "{round(amce, 0)}<br>",
      "<span style='font-size:70%'>",
      "{sex} − {sex[1]}, or<br>",
      "{round(avg_estimate, 0)} − {round(avg_estimate[1], 0)}",
      "</span>"
    )
  ) |>
  select(sex, amce_nice) |>
  mutate(species = "∆ in marginal mean") |>
  pivot_wider(names_from = sex, values_from = amce_nice)
Code
all_combined |>
  left_join(amce_species, by = join_by(species)) |>
  bind_rows(amce_sex) |>
  select(
    Species = species,
    Female = female,
    Male = male,
    `Marginal mean` = avg_explanation,
    `∆ in marginal mean` = amce_nice
  ) |>
  tt(
    notes = "Predicted penguin weights (g) from model `lm(body_mass ~ species + sex)`"
  ) |>
  group_tt(
    j = list("Sex" = 2:3)
  ) |>
  format_tt(replace = "") |>
  format_tt("notes", markdown = TRUE) |>
  style_tt(j = 2:5, align = "c") |>
  style_tt(i = 4, line = "t") |>
  theme_html(i = 4, css = "border-top-style: dashed;") |>
  style_tt(i = 1:5, j = 3, line = "r") |>
  theme_html(j = 3, css = "border-right-style: dashed;")
Sex
Species Female Male Marginal mean ∆ in marginal mean
Predicted penguin weights (g) from model lm(body_mass ~ species + sex)
Adelie 3372 4040 3706
(3372 + 4040) / 2
0
Adelie − Adelie, or
3706 − 3706
Chinstrap 3399 4067 3733
(3399 + 4067) / 2
27
Chinstrap − Adelie, or
3733 − 3706
Gentoo 4750 5418 5084
(4750 + 5418) / 2
1378
Gentoo − Adelie, or
5084 − 3706
Marginal mean 3841
(3372 + 3399 + 4750) / 3
4508
(4040 + 4067 + 5418) / 3
∆ in marginal mean 0
female − female, or
3841 − 3841
668
male − female, or
4508 − 3841

Differences in marginal means are equivalent to marginal effects or regression coefficients.

3.1.1 Faster way

These manual calculations are a pain though. We can use {marginaleffects} to do it more directly:

Code
avg_predictions(model, 
  newdata = datagrid(species = unique, sex = unique),
  by = c("sex", "species")
)
## 
##     sex   species Estimate Std. Error     z Pr(>|z|)   S 2.5 % 97.5 %
##  female Adelie        3372       31.4 107.3   <0.001 Inf  3311   3434
##  female Chinstrap     3399       42.1  80.7   <0.001 Inf  3317   3482
##  female Gentoo        4750       34.0 139.5   <0.001 Inf  4684   4817
##  male   Adelie        4040       31.4 128.5   <0.001 Inf  3978   4102
##  male   Chinstrap     4067       42.1  96.5   <0.001 Inf  3984   4149
##  male   Gentoo        5418       33.6 161.3   <0.001 Inf  5352   5484
## 
## Type: response

avg_comparisons(model, 
  newdata = datagrid(species = unique, sex = unique)
)
## 
##     Term           Contrast Estimate Std. Error      z Pr(>|z|)     S  2.5 % 97.5 %
##  sex     male - female         667.6       34.7 19.236   <0.001 271.5  599.5    736
##  species Chinstrap - Adelie     26.9       46.5  0.579    0.562   0.8  -64.2    118
##  species Gentoo - Adelie      1377.9       39.1 35.236   <0.001 901.1 1301.2   1455
## 
## Type: response