Code
library(tidyverse)
library(marginaleffects)
Marginal means (MMs) and average marginal component effects (AMCEs)
At its core, a “marginal mean” refers to the literal mean in the margins in a contingency table of model predictions based on a balanced grid of predictors.
To illustrate, we’ll make a model that predicts penguin weight based on species and sex and then make predictions for a balanced grid of covariates (i.e. male Adelie, female Adelie, male Chinstrap, female Chinstrap, male Gentoo, female Gentoo):
library(tidyverse)
library(marginaleffects)
<- penguins |> drop_na(sex)
penguins
<- lm(body_mass ~ species + sex, data = penguins)
model
<- model |>
preds predictions(datagrid(species = unique, sex = unique))
|>
preds select(estimate, species, sex) |>
pivot_wider(names_from = sex, values_from = estimate) |>
select(Species = species, Female = female, Male = male) |>
tt(
notes = "Predicted penguin weights (g) from model `lm(body_mass ~ species + sex)`"
|>
) group_tt(
j = list("Sex" = 2:3)
|>
) format_tt("notes", markdown = TRUE) |>
style_tt(j = 2:3, align = "c")
Sex | ||
---|---|---|
Species | Female | Male |
Predicted penguin weights (g) from model lm(body_mass ~ species + sex) | ||
Adelie | 3372 | 4040 |
Gentoo | 4750 | 5418 |
Chinstrap | 3399 | 4067 |
We can add a column and row in the margins to show the species-specific and sex-specific average predicted weights. For the sex-specific averages, all the between-species variation is “marginalized out” or accounted for. For the species-specific averages, all the between-sex variation is similarly marginalized out:
<- preds |>
mean_sex group_by(sex) |>
mutate(avg_estimate = mean(estimate)) |>
select(species, sex, estimate, avg_estimate) |>
pivot_wider(names_from = species, values_from = estimate) |>
mutate(
avg_explanation = glue::glue(
"{round(avg_estimate, 0)}<br>",
"<span style='font-size:70%'>",
"({round(Adelie, 0)} + {round(Chinstrap, 0)} + {round(Gentoo, 0)}) / 3",
"</span>"
)|>
) select(sex, avg_explanation) |>
mutate(species = "Marginal mean") |>
pivot_wider(names_from = sex, values_from = avg_explanation)
<- preds |>
mean_species group_by(species) |>
summarize(avg_estimate = mean(estimate))
<- preds |>
all_combined ungroup() |>
select(estimate, species, sex) |>
pivot_wider(names_from = sex, values_from = estimate) |>
left_join(mean_species, by = join_by(species)) |>
arrange(species) |>
mutate(
avg_explanation = glue::glue(
"{round(avg_estimate, 0)}<br>",
"<span style='font-size:70%'>",
"({round(female, 0)} + {round(male, 0)}) / 2",
"</span>"
)|>
) mutate(across(c(female, male), ~ as.character(round(., 0)))) |>
select(-avg_estimate) |>
bind_rows(mean_sex)
|>
all_combined select(
Species = species,
Female = female,
Male = male,
`Marginal mean` = avg_explanation
|>
) tt(
notes = "Predicted penguin weights (g) from model `lm(body_mass ~ species + sex)`"
|>
) group_tt(
j = list("Sex" = 2:3)
|>
) format_tt(replace = "") |>
format_tt("notes", markdown = TRUE) |>
style_tt(j = 2:4, align = "c") |>
style_tt(i = 4, line = "t") |>
theme_html(i = 4, css = "border-top-style: dashed;") |>
style_tt(i = 1:4, j = 3, line = "r") |>
theme_html(j = 3, css = "border-right-style: dashed;")
Sex | |||
---|---|---|---|
Species | Female | Male | Marginal mean |
Predicted penguin weights (g) from model lm(body_mass ~ species + sex) | |||
Adelie | 3372 | 4040 | 3706 (3372 + 4040) / 2 |
Chinstrap | 3399 | 4067 | 3733 (3399 + 4067) / 2 |
Gentoo | 4750 | 5418 | 5084 (4750 + 5418) / 2 |
Marginal mean | 3841 (3372 + 3399 + 4750) / 3 |
4508 (4040 + 4067 + 5418) / 3 |
Because regression is just fancy averages, the differences in marginal means here are actually identical to the coefficients in regression model. For instance, here are the results from the model:
|> broom::tidy()
model ## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3372. 31.4 107. 4.34e-258
## 2 speciesChinstrap 26.9 46.5 0.579 5.63e- 1
## 3 speciesGentoo 1378. 39.1 35.2 1.05e-113
## 4 sexmale 668. 34.7 19.2 8.73e- 56
The sexmale
coefficient of 667.56 represents the effect of moving from male to female when species is held constant.
The marginal means table shows the same thing. Marginalizing over species (i.e. holding species constant), the average predicted weight for female penguins is 3840.6 and for male penguins is 4508.2. The difference in those marginal means is 667.56—identical to the sexmale
coefficient!
<- preds |>
amce_species group_by(species) |>
summarize(avg_estimate = mean(estimate)) |>
mutate(amce = avg_estimate - avg_estimate[1]) |>
mutate(
amce_nice = glue::glue(
"{round(amce, 0)}<br>",
"<span style='font-size:70%'>",
"{species} − {species[1]}, or<br>",
"{round(avg_estimate, 0)} − {round(avg_estimate[1], 0)}",
"</span>"
)|>
) select(species, amce_nice)
<- preds |>
amce_sex group_by(sex) |>
summarize(avg_estimate = mean(estimate)) |>
mutate(amce = avg_estimate - avg_estimate[1]) |>
mutate(
amce_nice = glue::glue(
"{round(amce, 0)}<br>",
"<span style='font-size:70%'>",
"{sex} − {sex[1]}, or<br>",
"{round(avg_estimate, 0)} − {round(avg_estimate[1], 0)}",
"</span>"
)|>
) select(sex, amce_nice) |>
mutate(species = "∆ in marginal mean") |>
pivot_wider(names_from = sex, values_from = amce_nice)
|>
all_combined left_join(amce_species, by = join_by(species)) |>
bind_rows(amce_sex) |>
select(
Species = species,
Female = female,
Male = male,
`Marginal mean` = avg_explanation,
`∆ in marginal mean` = amce_nice
|>
) tt(
notes = "Predicted penguin weights (g) from model `lm(body_mass ~ species + sex)`"
|>
) group_tt(
j = list("Sex" = 2:3)
|>
) format_tt(replace = "") |>
format_tt("notes", markdown = TRUE) |>
style_tt(j = 2:5, align = "c") |>
style_tt(i = 4, line = "t") |>
theme_html(i = 4, css = "border-top-style: dashed;") |>
style_tt(i = 1:5, j = 3, line = "r") |>
theme_html(j = 3, css = "border-right-style: dashed;")
Sex | ||||
---|---|---|---|---|
Species | Female | Male | Marginal mean | ∆ in marginal mean |
Predicted penguin weights (g) from model lm(body_mass ~ species + sex) | ||||
Adelie | 3372 | 4040 | 3706 (3372 + 4040) / 2 |
0 Adelie − Adelie, or 3706 − 3706 |
Chinstrap | 3399 | 4067 | 3733 (3399 + 4067) / 2 |
27 Chinstrap − Adelie, or 3733 − 3706 |
Gentoo | 4750 | 5418 | 5084 (4750 + 5418) / 2 |
1378 Gentoo − Adelie, or 5084 − 3706 |
Marginal mean | 3841 (3372 + 3399 + 4750) / 3 |
4508 (4040 + 4067 + 5418) / 3 |
||
∆ in marginal mean | 0 female − female, or 3841 − 3841 |
668 male − female, or 4508 − 3841 |
Differences in marginal means are equivalent to marginal effects or regression coefficients.
These manual calculations are a pain though. We can use {marginaleffects} to do it more directly:
avg_predictions(model,
newdata = datagrid(species = unique, sex = unique),
by = c("sex", "species")
)##
## sex species Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## female Adelie 3372 31.4 107.3 <0.001 Inf 3311 3434
## female Chinstrap 3399 42.1 80.7 <0.001 Inf 3317 3482
## female Gentoo 4750 34.0 139.5 <0.001 Inf 4684 4817
## male Adelie 4040 31.4 128.5 <0.001 Inf 3978 4102
## male Chinstrap 4067 42.1 96.5 <0.001 Inf 3984 4149
## male Gentoo 5418 33.6 161.3 <0.001 Inf 5352 5484
##
## Type: response
avg_comparisons(model,
newdata = datagrid(species = unique, sex = unique)
)##
## Term Contrast Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## sex male - female 667.6 34.7 19.236 <0.001 271.5 599.5 736
## species Chinstrap - Adelie 26.9 46.5 0.579 0.562 0.8 -64.2 118
## species Gentoo - Adelie 1377.9 39.1 35.236 <0.001 901.1 1301.2 1455
##
## Type: response