library(tidyverse)
2 Process, clean, and reshape data
Popular conjoint survey platforms like Qualtrics and Sawtooth typically provide results data in two separate data files: (1) individual participant-level responses and (2) a bank of possible alternative-level combinations of features. To analyze the results of a conjoint experiment, the two datasets need to be joined.
2.1 Individual-level responses
<- readRDS(here::here("data", "processed_data", "responses_illustration.rds"))
responses responses
# A tibble: 295 × 15
resp_id gender age CBC_Random1 CBC_Random2 CBC_Random3 CBC_Random4
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 Female 19 1 2 1 1
2 5 Female 19 2 1 2 2
3 6 Male 20 2 1 1 2
4 7 Female 20 2 2 1 2
5 8 Female 20 2 2 2 2
6 9 Male 20 1 2 1 1
7 10 Female 19 2 2 2 1
8 11 Male 22 2 1 1 1
9 12 Male 20 1 1 2 1
10 13 Female 19 2 2 2 2
# ℹ 285 more rows
# ℹ 8 more variables: CBC_Random5 <dbl>, CBC_Random6 <dbl>, CBC_Random7 <dbl>,
# CBC_Random8 <dbl>, CBC_Random9 <dbl>, CBC_Random10 <dbl>,
# CBC_Random11 <dbl>, CBC_Random12 <dbl>
2.2 Possible alternatives
<- readRDS(here::here("data", "processed_data", "alternatives_illustration.rds"))
alternatives alternatives
# A tibble: 7,200 × 6
version question alt price packaging flavor
<dbl> <dbl> <dbl> <fct> <fct> <fct>
1 1 1 1 $2 Plastic + paper Chocolate
2 1 1 2 $4 Plastic + sticker Nuts
3 1 2 1 $3 Plastic + sticker Nuts
4 1 2 2 $4 Plastic + paper Chocolate
5 1 3 1 $2 Plastic + paper Chocolate
6 1 3 2 $3 Plastic + sticker Nuts
7 1 4 1 $2 Plastic + paper Chocolate
8 1 4 2 $3 Plastic + paper Nuts
9 1 5 1 $4 Plastic + sticker Nuts
10 1 5 2 $4 Plastic + sticker Chocolate
# ℹ 7,190 more rows
|>
alternatives summarize(
versions = n_distinct(version),
questions = n_distinct(question),
alts = n_distinct(alt)
)
# A tibble: 1 × 3
versions questions alts
<int> <int> <int>
1 300 12 2
2.3 Pivoting and expanding
The original responses data is wide, with a column for each of the 12 choices. We first need to make it long, with a row for each respondent-choice
<- responses |>
responses_long pivot_longer(
cols = starts_with("CBC_Random"),
names_to = "question_raw",
values_to = "chosen_alt"
%>%
) # The task number is embedded in text, like "CBC_Random6"; this extracts it
mutate(question = as.numeric(str_extract(question_raw, "\\d+"))) %>%
select(-question_raw)
responses_long
# A tibble: 3,540 × 5
resp_id gender age chosen_alt question
<dbl> <chr> <dbl> <dbl> <dbl>
1 4 Female 19 1 1
2 4 Female 19 2 2
3 4 Female 19 1 3
4 4 Female 19 1 4
5 4 Female 19 1 5
6 4 Female 19 1 6
7 4 Female 19 2 7
8 4 Female 19 1 8
9 4 Female 19 1 9
10 4 Female 19 1 10
# ℹ 3,530 more rows
|>
responses_long summarize(
resp_ids = n_distinct(resp_id),
questions = n_distinct(question)
)
# A tibble: 1 × 2
resp_ids questions
<int> <int>
1 295 12
<- responses_long |>
responses_long_expanded expand(resp_id, question, alt = 1:2) |>
left_join(responses_long, by = join_by(resp_id, question))
responses_long_expanded
# A tibble: 7,080 × 6
resp_id question alt gender age chosen_alt
<dbl> <dbl> <int> <chr> <dbl> <dbl>
1 4 1 1 Female 19 1
2 4 1 2 Female 19 1
3 4 2 1 Female 19 2
4 4 2 2 Female 19 2
5 4 3 1 Female 19 1
6 4 3 2 Female 19 1
7 4 4 1 Female 19 1
8 4 4 2 Female 19 1
9 4 5 1 Female 19 1
10 4 5 2 Female 19 1
# ℹ 7,070 more rows
|>
responses_long_expanded summarize(
resp_ids = n_distinct(resp_id),
questions = n_distinct(question),
alts = n_distinct(alt)
)
# A tibble: 1 × 3
resp_ids questions alts
<int> <int> <int>
1 295 12 2
2.4 Final data
<- responses_long_expanded |>
combined left_join(alternatives, by = join_by(resp_id == version, question, alt)) |>
mutate(choice = as.numeric(alt == chosen_alt))
nrow(combined) / 2 / 12
[1] 295