Load and clean data

library(tidyverse)
library(janitor)
library(here)

set.seed(1234)
# Constraints -------------------------------------------------------------

constraint_levels <- function(x) {
  filter(constraints, constraint == x) %>% 
    pull(levels_clean) %>%
    .[[1]]
}

constraints <- tribble(
  ~constraint, ~constraint_clean, ~levels_clean,
  "create_network", "Network creation", list("Network creation" = "T", "No network creation" = "F"),
  "select", "Selection", list("Selection" = "T", "No selection" = "F"),
  "disperse", "Dispersal", list("Dispersal" = "T", "No dispersal" = "F"),
  "compete", "Competition", list("Competition" = "T", "No competition" = "F"),
  "selectfor_d", "Selection for D", list("Selection" = "T", "No selection" = "F"),
  "catastrophe", "Catastrophe", list("Catastrophe" = "T", "No catastrophe" = "F")
)

saveRDS(constraints, here("data", "derived_data", "constraints.rds"))


# High/low ----------------------------------------------------------------

BHL <- read_rds(here("data", "raw_data", "BHL.rds")) %>% 
  clean_names(case = "snake") %>%   # Get rid of invalid characters in column names
  rename_all(list(~str_remove_all(., "yn$"))) %>%  # Get rid of "yn" in column names
  # Clean up constraint values
  mutate(create_network = fct_recode(create_network, !!!constraint_levels("create_network")),
         select = fct_recode(select, !!!constraint_levels("select")),
         disperse = fct_recode(disperse, !!!constraint_levels("disperse")),
         compete = fct_recode(compete, !!!constraint_levels("compete")),
         selectfor_d = fct_recode(selectfor_d, !!!constraint_levels("selectfor_d")),
         catastrophe = fct_recode(catastrophe, !!!constraint_levels("catastrophe"))) %>% 
  mutate_at(vars(one_of(constraints$constraint)), list(~ fct_inorder(.))) %>% 
  mutate(repp = factor(repp, levels = c(1, 2), labels = c("High", "Low"), ordered = TRUE))

BHL_small <- BHL %>% 
  group_by(repp) %>% 
  sample_frac(size = 0.1) %>% 
  ungroup()

sim_hl <- BHL_small %>% 
  # Make new columns with "_constraint" suffix that show if constraint is T/F
  # instead of using the label. e.g. "Selection" becomes TRUE, "No selection"
  # becomes FALSE
  mutate_at(vars(one_of(constraints$constraint)),
            list(constraint = ~as.logical(-as.integer(.) + 2L))) %>%
  # Count how many of the constraints are TRUE in each row
  mutate(n_constraints = reduce(select(., ends_with("_constraint")), `+`)) %>%
  # Make a factor version of the constraint count for plotting
  mutate(n_constraints_f = as.factor(n_constraints)) %>% 
  mutate(turnover_diff = landscape_linked_species_mean_turnover - landscape_unlinked_species_mean_turnover)

saveRDS(BHL, here("data", "derived_data", "BHL.rds"))
saveRDS(BHL_small, here("data", "derived_data", "BHL_small.rds"))
saveRDS(sim_hl, here("data", "derived_data", "sim_hl.rds"))


# Latin squares -----------------------------------------------------------

BLS <- read_rds(here("data", "raw_data", "BLS.rds")) %>% 
  clean_names(case = "snake") %>%   # Get rid of invalid characters in column names
  rename_all(list(~str_remove_all(., "yn$"))) %>%  # Get rid of "yn" in column names
  # Clean up constraint values
  mutate(create_network = fct_recode(create_network, !!!constraint_levels("create_network")),
         select = fct_recode(select, !!!constraint_levels("select")),
         disperse = fct_recode(disperse, !!!constraint_levels("disperse")),
         compete = fct_recode(compete, !!!constraint_levels("compete")),
         selectfor_d = fct_recode(selectfor_d, !!!constraint_levels("selectfor_d")),
         catastrophe = fct_recode(catastrophe, !!!constraint_levels("catastrophe"))) %>% 
  mutate_at(vars(one_of(constraints$constraint)), list(~fct_inorder(.)))

BLS_small <- BLS %>% 
  sample_frac(size = 0.1)

sim_ls <- BLS_small %>%
  # Make new columns with "_constraint" suffix that show if constraint is T/F
  # instead of using the label. e.g. "Selection" becomes TRUE, "No selection"
  # becomes FALSE
  mutate_at(vars(one_of(constraints$constraint)),
            list(constraint = ~as.logical(-as.integer(.) + 2L))) %>%
  # Count how many of the constraints are TRUE in each row
  mutate(n_constraints = reduce(select(., ends_with("_constraint")), `+`)) %>%
  # Make a factor version of the constraint count for plotting
  mutate(n_constraints_f = as.factor(n_constraints)) %>% 
  mutate(turnover_diff = landscape_linked_species_mean_turnover - landscape_unlinked_species_mean_turnover)

saveRDS(BLS, here("data", "derived_data", "BLS.rds"))
saveRDS(BLS_small, here("data", "derived_data", "BLS_small.rds"))
saveRDS(sim_ls, here("data", "derived_data", "sim_ls.rds"))

Create columns for every combination of constraint in the data (e.g. select, select & disperse, select & disperse & create_network, etc.)

(By the inimitable Vincent Arel-Bundock)

make_combinations <- function(df, m = 5) {
  com <- colnames(df)[2:ncol(df)] %>%
    combn(m) %>%
    as_tibble()
  out <- com %>%
    map(~ df[.]) %>%
    map(~ rowSums(.) == ncol(.)) %>%
    setNames(map(com, paste, collapse = " + ")) %>%
    as_tibble()
  return(out)
}

outcomes <- c("landscape_fitness_linked", "avg_evenness_t", "var_fitness", 
              "landscape_richness_mean", "landscape_linked_species_mean_turnover",
              "turnover_diff")

# High/low ----------------------------------------------------------------

# Select just the run number and *_constraint TRUE/FALSE columns
constraint_combinations_hl <- sim_hl %>%
  select(runnum, ends_with("_constraint")) %>%
  # Shrink names by removing "_constraint"
  rename_at(vars(ends_with("constraint")),
            list(~str_replace_all(., "_constraint", "")))

# Find all combinations of variables (m = number of items in combination; m = 2
# means pairs, m = 3 means triplets, etc.)
all_constraint_combos_hl <- map(2:6, ~make_combinations(constraint_combinations_hl, m = .)) %>%
  bind_cols(constraint_combinations_hl, .)

# Select the outcome variables we care about and join the constraint combinations
constraint_combo_outcomes_hl <- sim_hl %>%
  select(runnum, repp, n_constraints, one_of(outcomes)) %>%
  right_join(all_constraint_combos_hl, by = "runnum")

# Don't double count rows. If a row has two constraints like select and
# disperse, it'll also have select + disperse set to TRUE. If that's the case,
# we don't want to include it in just select or just disperse
constraint_combo_outcomes_hl_nested <- constraint_combo_outcomes_hl %>%
  select(-n_constraints) %>%
  # Make long
  gather(constraint_combo, value, -runnum, -repp, -one_of(outcomes)) %>%
  # Count how many constraints there are within each row based on + signs
  mutate(n = str_count(constraint_combo, "\\+") + 1) %>%
  # Only keep rows where the constraint is turned on
  filter(value == TRUE) %>%
  # Nest all the constraint combinations within each row
  group_by(runnum) %>%
  nest()

# Only keep the values where n == max(n) for that row
# This takes ≈2 minutes
constraint_combo_outcomes_hl_filtered <- constraint_combo_outcomes_hl_nested %>%
  mutate(filtered = data %>% map(~filter(., n == max(.$n)))) %>%
  select(-data) %>%
  unnest(filtered)

# This omitted all the rows where n_constraints == 0, so add those back in
no_constraints_hl <- constraint_combo_outcomes_hl %>%
  filter(n_constraints == 0) %>%
  mutate(constraint_combo = "No constraints", n = 0) %>%
  select(runnum, repp, one_of(outcomes), constraint_combo, n)

constraint_combo_outcomes_hl_done <- bind_rows(constraint_combo_outcomes_hl_filtered,
                                               no_constraints_hl) %>%
  select(-value)

saveRDS(constraint_combo_outcomes_hl_done, 
        here("data", "derived_data", "constraint_combo_outcomes_hl.rds"))


# Latin squares -----------------------------------------------------------

# Select just the run number and *_constraint TRUE/FALSE columns
constraint_combinations_ls <- sim_ls %>%
  select(runnum, ends_with("_constraint")) %>% 
  # Shrink names by removing "_constraint"
  rename_at(vars(ends_with("constraint")), 
            list(~str_replace_all(., "_constraint", "")))

# Find all combinations of variables (m = number of items in combination; m = 2
# means pairs, m = 3 means triplets, etc.)
all_constraint_combos_ls <- map(2:6, ~make_combinations(constraint_combinations_ls, m = .)) %>% 
  bind_cols(constraint_combinations_ls, .)

# Select the outcome variables we care about and join the constraint combinations
constraint_combo_outcomes_ls <- sim_ls %>%
  select(runnum, n_constraints, one_of(outcomes)) %>% 
  right_join(all_constraint_combos_ls, by = "runnum")

# Don't double count rows. If a row has two constraints like select and
# disperse, it'll also have select + disperse set to TRUE. If that's the case,
# we don't want to include it in just select or just disperse
constraint_combo_outcomes_ls_nested <- constraint_combo_outcomes_ls %>% 
  select(-n_constraints) %>% 
  # Make long
  gather(constraint_combo, value, -runnum, -one_of(outcomes)) %>% 
  # Count how many constraints there are within each row based on + signs
  mutate(n = str_count(constraint_combo, "\\+") + 1) %>%
  # Only keep rows where the constraint is turned on
  filter(value == TRUE) %>%
  # Nest all the constraint combinations within each row
  group_by(runnum) %>% 
  nest()

# Only keep the values where n == max(n) for that row
# This takes ≈1 minute
constraint_combo_outcomes_ls_filtered <- constraint_combo_outcomes_ls_nested %>% 
  mutate(filtered = data %>% map(~filter(., n == max(.$n)))) %>% 
  select(-data) %>% 
  unnest(filtered)

# This omitted all the rows where n_constraints == 0, so add those back in
no_constraints_ls <- constraint_combo_outcomes_ls %>% 
  filter(n_constraints == 0) %>% 
  mutate(constraint_combo = "No constraints", n = 0) %>% 
  select(runnum, one_of(outcomes), constraint_combo, n)

constraint_combo_outcomes_ls_done <- bind_rows(constraint_combo_outcomes_ls_filtered,
                                               no_constraints_ls) %>% 
  select(-value)

saveRDS(constraint_combo_outcomes_ls_done, 
        here("data", "derived_data", "constraint_combo_outcomes_ls.rds"))

Data details:

  • High-low full: 1,280,000 rows
    • High: 640,000 rows
    • Low: 640,000 rows
  • High-low small: 128,000 rows
    • High: 64,000 rows
    • Low: 64,000 rows
  • Latin squares full: 640,000 rows
  • Latin squares small: 64,000 rows


Original computing environment

writeLines(readLines(file.path(Sys.getenv("HOME"), ".R/Makevars")))
## # http://dirk.eddelbuettel.com/blog/2017/11/27/#011_faster_package_installation_one
## # VER=
## # CCACHE=ccache
## # CC=$(CCACHE) gcc$(VER)
## # CXX=$(CCACHE) g++$(VER)
## # CXXFLAGS=-O3 #-Wno-unused-variable -Wno-unused-function -Wno-unused-local-typedefs
## # CXX11=$(CCACHE) g++$(VER)
## # CXX14=$(CCACHE) g++$(VER)
## # FC=$(CCACHE) gfortran$(VER)
## # F77=$(CCACHE) gfortran$(VER)
## 
## # clang: start
## CFLAGS=-isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk
## CCFLAGS=-isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk
## CXXFLAGS=-isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk
## CPPFLAGS=-isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include
## 
## SHLIB_CXXLDFLAGS+=-Wl,-rpath,${R_HOME}/lib ${R_HOME}/lib/libc++abi.1.dylib
## SHLIB_CXX14LDFLAGS+=-Wl,-rpath,${R_HOME}/lib ${R_HOME}/lib/libc++abi.1.dylib
## # clang: end
## 
## CXX14FLAGS=-O3 -march=native -mtune=native
## CXX14FLAGS += -arch x86_64 -ftemplate-depth-256
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.3 (2020-02-29)
##  os       macOS Catalina 10.15.4      
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2020-04-23                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version    date       lib source                            
##  assertthat    0.2.1      2019-03-21 [1] CRAN (R 3.6.0)                    
##  backports     1.1.6      2020-04-05 [1] CRAN (R 3.6.2)                    
##  base64enc     0.1-3      2015-07-28 [1] CRAN (R 3.6.0)                    
##  broom         0.5.3.9000 2020-04-01 [1] Github (tidymodels/broom@3c922d5) 
##  callr         3.4.3      2020-03-28 [1] CRAN (R 3.6.2)                    
##  cellranger    1.1.0      2016-07-27 [1] CRAN (R 3.6.0)                    
##  cli           2.0.2      2020-02-28 [1] CRAN (R 3.6.0)                    
##  colorspace    1.4-1      2019-03-18 [1] CRAN (R 3.6.0)                    
##  crayon        1.3.4      2017-09-16 [1] CRAN (R 3.6.0)                    
##  DBI           1.1.0      2019-12-15 [1] CRAN (R 3.6.0)                    
##  dbplyr        1.4.2      2019-06-17 [1] CRAN (R 3.6.0)                    
##  desc          1.2.0      2018-05-01 [1] CRAN (R 3.6.0)                    
##  devtools      2.2.2      2020-02-17 [1] CRAN (R 3.6.0)                    
##  digest        0.6.25     2020-02-23 [1] CRAN (R 3.6.0)                    
##  dplyr       * 0.8.5      2020-03-07 [1] CRAN (R 3.6.0)                    
##  ellipsis      0.3.0      2019-09-20 [1] CRAN (R 3.6.0)                    
##  evaluate      0.14       2019-05-28 [1] CRAN (R 3.6.0)                    
##  fansi         0.4.1      2020-01-08 [1] CRAN (R 3.6.0)                    
##  forcats     * 0.5.0      2020-03-01 [1] CRAN (R 3.6.0)                    
##  fs            1.3.2      2020-03-05 [1] CRAN (R 3.6.0)                    
##  generics      0.0.2      2018-11-29 [1] CRAN (R 3.6.0)                    
##  ggplot2     * 3.3.0.9000 2020-04-23 [1] Github (tidyverse/ggplot2@d3d47be)
##  glue          1.4.0      2020-04-03 [1] CRAN (R 3.6.2)                    
##  gtable        0.3.0      2019-03-25 [1] CRAN (R 3.6.0)                    
##  haven         2.2.0      2019-11-08 [1] CRAN (R 3.6.0)                    
##  here        * 0.1        2017-05-28 [1] CRAN (R 3.6.0)                    
##  hms           0.5.3      2020-01-08 [1] CRAN (R 3.6.0)                    
##  htmltools     0.4.0      2019-10-04 [1] CRAN (R 3.6.0)                    
##  httr          1.4.1      2019-08-05 [1] CRAN (R 3.6.0)                    
##  janitor     * 2.0.1      2020-04-12 [1] CRAN (R 3.6.2)                    
##  jsonlite      1.6.1      2020-02-02 [1] CRAN (R 3.6.0)                    
##  knitr         1.28       2020-02-06 [1] CRAN (R 3.6.0)                    
##  lifecycle     0.2.0      2020-03-06 [1] CRAN (R 3.6.0)                    
##  lubridate     1.7.4      2018-04-11 [1] CRAN (R 3.6.0)                    
##  magrittr      1.5        2014-11-22 [1] CRAN (R 3.6.0)                    
##  memoise       1.1.0      2017-04-21 [1] CRAN (R 3.6.0)                    
##  modelr        0.1.6      2020-02-22 [1] CRAN (R 3.6.0)                    
##  munsell       0.5.0      2018-06-12 [1] CRAN (R 3.6.0)                    
##  pillar        1.4.3      2019-12-20 [1] CRAN (R 3.6.0)                    
##  pkgbuild      1.0.6      2019-10-09 [1] CRAN (R 3.6.0)                    
##  pkgconfig     2.0.3      2019-09-22 [1] CRAN (R 3.6.0)                    
##  pkgload       1.0.2      2018-10-29 [1] CRAN (R 3.6.0)                    
##  prettyunits   1.1.1      2020-01-24 [1] CRAN (R 3.6.0)                    
##  processx      3.4.2      2020-02-09 [1] CRAN (R 3.6.0)                    
##  ps            1.3.2      2020-02-13 [1] CRAN (R 3.6.0)                    
##  purrr       * 0.3.4      2020-04-17 [1] CRAN (R 3.6.2)                    
##  R6            2.4.1      2019-11-12 [1] CRAN (R 3.6.0)                    
##  Rcpp          1.0.4.6    2020-04-09 [1] CRAN (R 3.6.3)                    
##  readr       * 1.3.1      2018-12-21 [1] CRAN (R 3.6.0)                    
##  readxl        1.3.1      2019-03-13 [1] CRAN (R 3.6.0)                    
##  remotes       2.1.1      2020-02-15 [1] CRAN (R 3.6.0)                    
##  reprex        0.3.0      2019-05-16 [1] CRAN (R 3.6.0)                    
##  rlang         0.4.5      2020-03-01 [1] CRAN (R 3.6.0)                    
##  rmarkdown     2.1        2020-01-20 [1] CRAN (R 3.6.0)                    
##  rprojroot     1.3-2      2018-01-03 [1] CRAN (R 3.6.0)                    
##  rstudioapi    0.11       2020-02-07 [1] CRAN (R 3.6.0)                    
##  rvest         0.3.5      2019-11-08 [1] CRAN (R 3.6.0)                    
##  scales        1.1.0      2019-11-18 [1] CRAN (R 3.6.0)                    
##  sessioninfo   1.1.1      2018-11-05 [1] CRAN (R 3.6.0)                    
##  snakecase     0.11.0     2019-05-25 [1] CRAN (R 3.6.0)                    
##  stringi       1.4.6      2020-02-17 [1] CRAN (R 3.6.0)                    
##  stringr     * 1.4.0      2019-02-10 [1] CRAN (R 3.6.0)                    
##  testthat      2.3.2      2020-03-02 [1] CRAN (R 3.6.0)                    
##  tibble      * 3.0.1      2020-04-20 [1] CRAN (R 3.6.2)                    
##  tidyr       * 1.0.2      2020-01-24 [1] CRAN (R 3.6.0)                    
##  tidyselect    1.0.0      2020-01-27 [1] CRAN (R 3.6.0)                    
##  tidyverse   * 1.3.0      2019-11-21 [1] CRAN (R 3.6.0)                    
##  usethis       1.5.1      2019-07-04 [1] CRAN (R 3.6.0)                    
##  vctrs         0.2.4      2020-03-10 [1] CRAN (R 3.6.0)                    
##  withr         2.2.0      2020-04-20 [1] CRAN (R 3.6.2)                    
##  xfun          0.13       2020-04-13 [1] CRAN (R 3.6.2)                    
##  xml2          1.3.1      2020-04-09 [1] CRAN (R 3.6.2)                    
##  yaml          2.2.1      2020-02-01 [1] CRAN (R 3.6.0)                    
## 
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library
---
title: "Clean data"
author: "Steven L. Peck and Andrew Heiss"
date: "Last run: `r format(Sys.time(), '%B %e, %Y')`"
output: 
  html_document:
    code_folding: hide
editor_options: 
  chunk_output_type: console
---

# Load and clean data

```{r setup, warning=FALSE, message=FALSE}
library(tidyverse)
library(janitor)
library(here)

set.seed(1234)
```


```{r clean-data, warning=FALSE, message=FALSE, cache=TRUE}
# Constraints -------------------------------------------------------------

constraint_levels <- function(x) {
  filter(constraints, constraint == x) %>% 
    pull(levels_clean) %>%
    .[[1]]
}

constraints <- tribble(
  ~constraint, ~constraint_clean, ~levels_clean,
  "create_network", "Network creation", list("Network creation" = "T", "No network creation" = "F"),
  "select", "Selection", list("Selection" = "T", "No selection" = "F"),
  "disperse", "Dispersal", list("Dispersal" = "T", "No dispersal" = "F"),
  "compete", "Competition", list("Competition" = "T", "No competition" = "F"),
  "selectfor_d", "Selection for D", list("Selection" = "T", "No selection" = "F"),
  "catastrophe", "Catastrophe", list("Catastrophe" = "T", "No catastrophe" = "F")
)

saveRDS(constraints, here("data", "derived_data", "constraints.rds"))


# High/low ----------------------------------------------------------------

BHL <- read_rds(here("data", "raw_data", "BHL.rds")) %>% 
  clean_names(case = "snake") %>%   # Get rid of invalid characters in column names
  rename_all(list(~str_remove_all(., "yn$"))) %>%  # Get rid of "yn" in column names
  # Clean up constraint values
  mutate(create_network = fct_recode(create_network, !!!constraint_levels("create_network")),
         select = fct_recode(select, !!!constraint_levels("select")),
         disperse = fct_recode(disperse, !!!constraint_levels("disperse")),
         compete = fct_recode(compete, !!!constraint_levels("compete")),
         selectfor_d = fct_recode(selectfor_d, !!!constraint_levels("selectfor_d")),
         catastrophe = fct_recode(catastrophe, !!!constraint_levels("catastrophe"))) %>% 
  mutate_at(vars(one_of(constraints$constraint)), list(~ fct_inorder(.))) %>% 
  mutate(repp = factor(repp, levels = c(1, 2), labels = c("High", "Low"), ordered = TRUE))

BHL_small <- BHL %>% 
  group_by(repp) %>% 
  sample_frac(size = 0.1) %>% 
  ungroup()

sim_hl <- BHL_small %>% 
  # Make new columns with "_constraint" suffix that show if constraint is T/F
  # instead of using the label. e.g. "Selection" becomes TRUE, "No selection"
  # becomes FALSE
  mutate_at(vars(one_of(constraints$constraint)),
            list(constraint = ~as.logical(-as.integer(.) + 2L))) %>%
  # Count how many of the constraints are TRUE in each row
  mutate(n_constraints = reduce(select(., ends_with("_constraint")), `+`)) %>%
  # Make a factor version of the constraint count for plotting
  mutate(n_constraints_f = as.factor(n_constraints)) %>% 
  mutate(turnover_diff = landscape_linked_species_mean_turnover - landscape_unlinked_species_mean_turnover)

saveRDS(BHL, here("data", "derived_data", "BHL.rds"))
saveRDS(BHL_small, here("data", "derived_data", "BHL_small.rds"))
saveRDS(sim_hl, here("data", "derived_data", "sim_hl.rds"))


# Latin squares -----------------------------------------------------------

BLS <- read_rds(here("data", "raw_data", "BLS.rds")) %>% 
  clean_names(case = "snake") %>%   # Get rid of invalid characters in column names
  rename_all(list(~str_remove_all(., "yn$"))) %>%  # Get rid of "yn" in column names
  # Clean up constraint values
  mutate(create_network = fct_recode(create_network, !!!constraint_levels("create_network")),
         select = fct_recode(select, !!!constraint_levels("select")),
         disperse = fct_recode(disperse, !!!constraint_levels("disperse")),
         compete = fct_recode(compete, !!!constraint_levels("compete")),
         selectfor_d = fct_recode(selectfor_d, !!!constraint_levels("selectfor_d")),
         catastrophe = fct_recode(catastrophe, !!!constraint_levels("catastrophe"))) %>% 
  mutate_at(vars(one_of(constraints$constraint)), list(~fct_inorder(.)))

BLS_small <- BLS %>% 
  sample_frac(size = 0.1)

sim_ls <- BLS_small %>%
  # Make new columns with "_constraint" suffix that show if constraint is T/F
  # instead of using the label. e.g. "Selection" becomes TRUE, "No selection"
  # becomes FALSE
  mutate_at(vars(one_of(constraints$constraint)),
            list(constraint = ~as.logical(-as.integer(.) + 2L))) %>%
  # Count how many of the constraints are TRUE in each row
  mutate(n_constraints = reduce(select(., ends_with("_constraint")), `+`)) %>%
  # Make a factor version of the constraint count for plotting
  mutate(n_constraints_f = as.factor(n_constraints)) %>% 
  mutate(turnover_diff = landscape_linked_species_mean_turnover - landscape_unlinked_species_mean_turnover)

saveRDS(BLS, here("data", "derived_data", "BLS.rds"))
saveRDS(BLS_small, here("data", "derived_data", "BLS_small.rds"))
saveRDS(sim_ls, here("data", "derived_data", "sim_ls.rds"))
```


Create columns for every combination of constraint in the data (e.g. `select`, `select` & `disperse`, `select` & `disperse` & `create_network`, etc.)

(By the inimitable Vincent Arel-Bundock)

```{r cache=TRUE, warning=FALSE}
make_combinations <- function(df, m = 5) {
  com <- colnames(df)[2:ncol(df)] %>%
    combn(m) %>%
    as_tibble()
  out <- com %>%
    map(~ df[.]) %>%
    map(~ rowSums(.) == ncol(.)) %>%
    setNames(map(com, paste, collapse = " + ")) %>%
    as_tibble()
  return(out)
}

outcomes <- c("landscape_fitness_linked", "avg_evenness_t", "var_fitness", 
              "landscape_richness_mean", "landscape_linked_species_mean_turnover",
              "turnover_diff")

# High/low ----------------------------------------------------------------

# Select just the run number and *_constraint TRUE/FALSE columns
constraint_combinations_hl <- sim_hl %>%
  select(runnum, ends_with("_constraint")) %>%
  # Shrink names by removing "_constraint"
  rename_at(vars(ends_with("constraint")),
            list(~str_replace_all(., "_constraint", "")))

# Find all combinations of variables (m = number of items in combination; m = 2
# means pairs, m = 3 means triplets, etc.)
all_constraint_combos_hl <- map(2:6, ~make_combinations(constraint_combinations_hl, m = .)) %>%
  bind_cols(constraint_combinations_hl, .)

# Select the outcome variables we care about and join the constraint combinations
constraint_combo_outcomes_hl <- sim_hl %>%
  select(runnum, repp, n_constraints, one_of(outcomes)) %>%
  right_join(all_constraint_combos_hl, by = "runnum")

# Don't double count rows. If a row has two constraints like select and
# disperse, it'll also have select + disperse set to TRUE. If that's the case,
# we don't want to include it in just select or just disperse
constraint_combo_outcomes_hl_nested <- constraint_combo_outcomes_hl %>%
  select(-n_constraints) %>%
  # Make long
  gather(constraint_combo, value, -runnum, -repp, -one_of(outcomes)) %>%
  # Count how many constraints there are within each row based on + signs
  mutate(n = str_count(constraint_combo, "\\+") + 1) %>%
  # Only keep rows where the constraint is turned on
  filter(value == TRUE) %>%
  # Nest all the constraint combinations within each row
  group_by(runnum) %>%
  nest()

# Only keep the values where n == max(n) for that row
# This takes ≈2 minutes
constraint_combo_outcomes_hl_filtered <- constraint_combo_outcomes_hl_nested %>%
  mutate(filtered = data %>% map(~filter(., n == max(.$n)))) %>%
  select(-data) %>%
  unnest(filtered)

# This omitted all the rows where n_constraints == 0, so add those back in
no_constraints_hl <- constraint_combo_outcomes_hl %>%
  filter(n_constraints == 0) %>%
  mutate(constraint_combo = "No constraints", n = 0) %>%
  select(runnum, repp, one_of(outcomes), constraint_combo, n)

constraint_combo_outcomes_hl_done <- bind_rows(constraint_combo_outcomes_hl_filtered,
                                               no_constraints_hl) %>%
  select(-value)

saveRDS(constraint_combo_outcomes_hl_done, 
        here("data", "derived_data", "constraint_combo_outcomes_hl.rds"))


# Latin squares -----------------------------------------------------------

# Select just the run number and *_constraint TRUE/FALSE columns
constraint_combinations_ls <- sim_ls %>%
  select(runnum, ends_with("_constraint")) %>% 
  # Shrink names by removing "_constraint"
  rename_at(vars(ends_with("constraint")), 
            list(~str_replace_all(., "_constraint", "")))

# Find all combinations of variables (m = number of items in combination; m = 2
# means pairs, m = 3 means triplets, etc.)
all_constraint_combos_ls <- map(2:6, ~make_combinations(constraint_combinations_ls, m = .)) %>% 
  bind_cols(constraint_combinations_ls, .)

# Select the outcome variables we care about and join the constraint combinations
constraint_combo_outcomes_ls <- sim_ls %>%
  select(runnum, n_constraints, one_of(outcomes)) %>% 
  right_join(all_constraint_combos_ls, by = "runnum")

# Don't double count rows. If a row has two constraints like select and
# disperse, it'll also have select + disperse set to TRUE. If that's the case,
# we don't want to include it in just select or just disperse
constraint_combo_outcomes_ls_nested <- constraint_combo_outcomes_ls %>% 
  select(-n_constraints) %>% 
  # Make long
  gather(constraint_combo, value, -runnum, -one_of(outcomes)) %>% 
  # Count how many constraints there are within each row based on + signs
  mutate(n = str_count(constraint_combo, "\\+") + 1) %>%
  # Only keep rows where the constraint is turned on
  filter(value == TRUE) %>%
  # Nest all the constraint combinations within each row
  group_by(runnum) %>% 
  nest()

# Only keep the values where n == max(n) for that row
# This takes ≈1 minute
constraint_combo_outcomes_ls_filtered <- constraint_combo_outcomes_ls_nested %>% 
  mutate(filtered = data %>% map(~filter(., n == max(.$n)))) %>% 
  select(-data) %>% 
  unnest(filtered)

# This omitted all the rows where n_constraints == 0, so add those back in
no_constraints_ls <- constraint_combo_outcomes_ls %>% 
  filter(n_constraints == 0) %>% 
  mutate(constraint_combo = "No constraints", n = 0) %>% 
  select(runnum, one_of(outcomes), constraint_combo, n)

constraint_combo_outcomes_ls_done <- bind_rows(constraint_combo_outcomes_ls_filtered,
                                               no_constraints_ls) %>% 
  select(-value)

saveRDS(constraint_combo_outcomes_ls_done, 
        here("data", "derived_data", "constraint_combo_outcomes_ls.rds"))
```


Data details:

- High-low full: `r scales::comma(nrow(BHL))` rows
    - High: `r scales::comma(nrow(filter(BHL, repp == "High")))` rows
    - Low: `r scales::comma(nrow(filter(BHL, repp == "Low")))` rows
- High-low small: `r scales::comma(nrow(BHL_small))` rows
    - High: `r scales::comma(nrow(filter(BHL_small, repp == "High")))` rows
    - Low: `r scales::comma(nrow(filter(BHL_small, repp == "Low")))` rows
- Latin squares full: `r scales::comma(nrow(BLS))` rows
- Latin squares small: `r scales::comma(nrow(BLS_small))` rows


\

# Original computing environment

<button data-toggle="collapse" data-target="#sessioninfo" class="btn btn-primary btn-md btn-info">Here's what we used the last time we built this page</button>

<div id="sessioninfo" class="collapse">

```{r show-session-info, echo=TRUE, width=100}
writeLines(readLines(file.path(Sys.getenv("HOME"), ".R/Makevars")))

devtools::session_info()
```

</div> 
