library(tidyverse)
library(ggstance)
library(broom)
library(countrycode)
library(pander)
library(stargazer)
library(skimr)
library(here)
# By default, R uses polynomial contrasts for ordered factors in linear models
# options("contrasts")
# So make ordered factors use treatment contrasts instead
options(contrasts = rep("contr.treatment", 2))
# Or do it on a single variable:
# contrasts(df$x) <- "contr.treatment"
# Load pre-cleaned data
edb_clean <- read_rds(file.path(here(), "output", "data", "edb_clean.rds"))
edb_reforms <- read_rds(file.path(here(), "output", "data", "edb_reforms.rds"))
# Load helpful functions
source(file.path(here(), "lib", "model_stuff.R"))
source(file.path(here(), "lib", "graphics_stuff.R"))
Variable descriptions
Description of variables used in analysis
sb_days |
Number of days required to start a business |
World Bank |
sb_cost |
Cost (% of income per capita) of starting a business |
World Bank |
sb_capital |
Paid-in minimum capital (% of income per capita) required to start a business |
World Bank |
sb_proced |
Number of procedures required for an entrepreneur to legally operate a business |
World Bank |
con_proced |
Number of procedures required to resolve a dispute |
World Bank |
con_days |
Number of days required for the process of dispute resolution |
World Bank |
gdp |
GDP (constant 2005 USD) |
World Development Indicators |
gdpcap |
GDP per capita |
World Development Indicators |
gdpgrowth |
GDP growth |
World Development Indicators |
pop_ln |
Log of population |
World Development Indicators |
polity |
Polity IV score |
Polity IV Project |
inttot |
Magnitude of international conflict |
Center for Systemic Peace |
civtot |
Magnitude of civil conflict |
Center for Systemic Peace |
loan_ln |
Log of the sum of all loans from the IBRD since 2005, constant for all years |
|
Variable summaries
The small inline histograms in this table will only display correctly when they use a font that supports block elements, such as DejaVu Sans or Arial.
edb_summary_stats <- edb_clean %>%
select(one_of(edb_summary_base$Variable)) %>%
gather(Variable, value) %>%
filter(!is.na(value)) %>%
group_by(Variable) %>%
summarize(N = n(),
Mean = mean(value),
`Std. Dev` = sd(value),
Min = min(value),
Max = max(value),
Distribution = inline_hist(value))
edb_summary <- edb_summary_base %>%
left_join(edb_summary_stats, by = "Variable") %>%
mutate(Variable = paste0("`", Variable, "`")) %>%
select(-Definition, -Source)
caption <- "Summary statistics of variables used in analysis"
tbl_edb_summary <- pandoc.table.return(edb_summary, caption = caption,
big.mark = ",", split.tables = Inf,
justify = "lcccccc")
cat(tbl_edb_summary)
Summary statistics of variables used in analysis
sb_days |
2,368 |
39.55 |
52.8 |
0.5 |
697 |
▇▁▁▁▁▁▁▁ |
sb_cost |
2,368 |
57.27 |
123.1 |
0 |
1,540 |
▇▁▁▁▁▁▁▁ |
sb_capital |
2,258 |
114.2 |
434.5 |
0 |
7,445 |
▇▁▁▁▁▁▁▁ |
sb_proced |
2,368 |
8.55 |
3.474 |
1 |
20 |
▂▃▇▅▃▂▁▁ |
con_proced |
2,339 |
36.77 |
8.006 |
0 |
62 |
▁▁▁▃▇▇▂▁ |
con_days |
2,338 |
590.8 |
313.3 |
7 |
1,800 |
▂▆▇▃▁▁▁▁ |
gdp |
2,468 |
264,872,882,270 |
1,122,476,616,993 |
97,813,220 |
14,450,329,106,512 |
▇▁▁▁▁▁▁▁ |
gdpcap |
2,480 |
10,591 |
16,385 |
108 |
113,739 |
▇▁▁▁▁▁▁▁ |
gdpgrowth |
2,468 |
2.476 |
5.478 |
-62.47 |
102.8 |
▁▁▂▇▁▁▁▁ |
pop_ln |
2,512 |
15.56 |
2.068 |
9.861 |
21.03 |
▁▂▂▆▇▅▂▁ |
polity |
2,138 |
3.918 |
6.238 |
-10 |
10 |
▁▂▁▂▁▂▃▇ |
inttot |
2,182 |
0.06279 |
0.4875 |
0 |
6 |
▇▁▁▁▁▁▁▁ |
civtot |
2,182 |
0.4216 |
1.279 |
0 |
9 |
▇▁▁▁▁▁▁▁ |
loan_ln |
3,016 |
13.97 |
10.78 |
0 |
26.46 |
▇▁▁▁▁▂▇▃ |
List of countries in initial 2001 report
* indicates country has an EDB reform committee by 2015
edb_bureaus <- read_csv(file.path(here(), "output", "data", "edb_bureaus.csv"))
country_names <- edb_clean %>%
filter(in_2001 == 1) %>%
group_by(ccode) %>%
summarize(Country = first(country_name)) %>%
ungroup() %>%
mutate(has_committee = ifelse(ccode %in% edb_bureaus$cowcode, "\\*", "")) %>%
arrange(Country) %>%
mutate(Country = paste0(Country, has_committee)) %>%
select(Country)
caption <- "Countries in 2001 report"
tbl_countries <- pandoc.table.return(matrix(c(country_names$Country, rep(NA, 2)),
ncol = 4), caption = caption,
split.tables = Inf, missing = "", justify = "llll")
cat(tbl_countries)
Countries in 2001 report
Albania |
Ecuador |
Madagascar |
Slovakia |
Algeria* |
Egypt |
Malawi* |
Slovenia |
Argentina |
Ethiopia |
Malaysia* |
South Africa |
Armenia |
Finland |
Mali* |
South Korea* |
Australia |
France |
Mexico* |
Spain |
Austria |
Georgia* |
Moldova* |
Sri Lanka* |
Azerbaijan* |
Germany |
Mongolia |
Sweden |
Bangladesh |
Ghana |
Morocco* |
Switzerland |
Belarus |
Greece |
Mozambique |
Syria |
Belgium |
Guatemala* |
Nepal |
Taiwan |
Benin |
Honduras |
Netherlands |
Tanzania |
Bhutan |
Hong Kong SAR China |
New Zealand |
Thailand |
Bolivia |
Hungary |
Nicaragua |
Tunisia |
Bosnia & Herzegovina |
India |
Niger |
Turkey |
Botswana* |
Indonesia* |
Nigeria* |
Uganda |
Bulgaria |
Iran |
Norway |
Ukraine* |
Burkina Faso |
Ireland |
Pakistan |
United Arab Emirates* |
Cameroon |
Israel |
Panama* |
United Kingdom* |
Canada |
Italy |
Peru* |
United States |
Chile* |
Jamaica |
Philippines* |
Uruguay |
China |
Japan |
Poland* |
Uzbekistan* |
Colombia* |
Jordan |
Portugal |
Venezuela |
Costa Rica* |
Kazakhstan* |
Romania |
Vietnam |
Côte d’Ivoire* |
Kenya* |
Russia* |
Yemen |
Croatia* |
Kyrgyzstan* |
Saudi Arabia* |
Zambia* |
Czechia* |
Latvia |
Senegal |
Zimbabwe |
Denmark |
Lebanon |
Serbia |
|
Dominican Republic* |
Lithuania |
Singapore |
|
How control variables relate to being in the sample and to the outcomes
Model A1
edb_in_2001 <- edb_clean %>%
filter(year > 2000) %>%
group_by(ccode) %>%
mutate(in_2001_lead = lead(in_2001),
in_2001_lead = factor(in_2001_lead, levels = 0:1,
labels = c("Not in 2001", "In 2001")))
model_in_2001 <- glm(in_2001_lead ~ gdpcap_ln + gdpgrowth + polity + pop_ln +
inttot + civtot + loan_ln,
data = edb_in_2001, family = binomial(link = "logit"))
model_in_2001_robust <- robust_clusterify(model_in_2001, edb_in_2001, "ccode") %>%
magrittr::use_series(coefs) %>% tidy()
model_in_2001_out <- stargazer(model_in_2001, type = "html", dep.var.caption = "",
dep.var.labels = "in\\_2001\\_lead",
se = list(model_in_2001_robust$std.error),
notes = c("Logistic regression model. Robust standard errors clustered by country.",
"Models include countries present in the 2001 EDB report."))
|
|
in_2001_lead
|
|
gdpcap_ln
|
0.688**
|
|
(0.268)
|
|
|
gdpgrowth
|
0.012
|
|
(0.024)
|
|
|
polity
|
0.124***
|
|
(0.040)
|
|
|
pop_ln
|
1.635***
|
|
(0.479)
|
|
|
inttot
|
-0.527*
|
|
(0.307)
|
|
|
civtot
|
-0.586**
|
|
(0.251)
|
|
|
loan_ln
|
0.037
|
|
(0.035)
|
|
|
Constant
|
-31.163***
|
|
(9.261)
|
|
|
|
Observations
|
1,921
|
Log Likelihood
|
-678.778
|
Akaike Inf. Crit.
|
1,373.555
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
|
Logistic regression model. Robust standard errors clustered by country.
|
|
Models include countries present in the 2001 EDB report.
|
Models A2–A5
# This function generates an R formula based on a given outcome and its lead.
# Given "sb_days_ln", it will create and run "sb_days_ln_lead ~ sb_days_ln + ..."
run_leaded_ols_check <- function(outcome, df) {
outcome_lead <- paste0(outcome, "_lead")
form <- as.formula(paste0(outcome_lead, " ~ ", outcome,
" + gdpcap_ln + gdpgrowth + polity + ",
"pop_ln + inttot + civtot + loan_ln"))
lm(form, data = df)
}
# Define all the models that need to be run
models_to_run <- data_frame(outcome = c("sb_days_ln", "sb_proced",
"sb_cost_ln", "sb_capital_ln"),
grouping = 1) # Temporary variable for joining
edb_checks <- edb_clean %>%
filter(year > 2000, in_2001 == 1) %>%
mutate(grouping = 1) %>%
group_by(ccode) %>%
mutate_at(vars(sb_days_ln, sb_proced, sb_cost_ln, sb_capital_ln),
funs(lead = lead(.))) %>%
group_by(grouping) %>%
nest() %>%
right_join(models_to_run, by = "grouping")
# Run all the models within the data frame
edb_checks_models <- edb_checks %>%
mutate(model = pmap(.l = list(outcome, data), run_leaded_ols_check),
# Add robust clustered SEs
robust_se = pmap(.l = list(model, data, "ccode"), robust_clusterify),
# Add a data frame of model parameters with correct SEs
tidy_robust = robust_se %>% map(~ tidy(.$coef)),
ses_only = tidy_robust %>% map(~ .$std.error))
edb_checks_models_out <- stargazer(edb_checks_models$model, type = "html",
dep.var.caption = "",
se = edb_checks_models$ses_only,
notes = c("OLS models. Robust standard errors clustered by country.",
"Models include countries present in the 2001 EDB report."),
keep.stat = c("n", "rsq", "adj.rsq"))
|
|
sb_days_ln_lead
|
sb_proced_lead
|
sb_cost_ln_lead
|
sb_capital_ln_lead
|
|
(1)
|
(2)
|
(3)
|
(4)
|
|
sb_days_ln
|
0.911***
|
|
|
|
|
(0.014)
|
|
|
|
|
|
|
|
|
sb_proced
|
|
0.929***
|
|
|
|
|
(0.015)
|
|
|
|
|
|
|
|
sb_cost_ln
|
|
|
0.941***
|
|
|
|
|
(0.010)
|
|
|
|
|
|
|
sb_capital_ln
|
|
|
|
0.915***
|
|
|
|
|
(0.011)
|
|
|
|
|
|
gdpcap_ln
|
-0.025**
|
-0.045
|
-0.051***
|
-0.042*
|
|
(0.011)
|
(0.044)
|
(0.013)
|
(0.023)
|
|
|
|
|
|
gdpgrowth
|
-0.007***
|
-0.020***
|
-0.014***
|
-0.004
|
|
(0.002)
|
(0.007)
|
(0.002)
|
(0.006)
|
|
|
|
|
|
polity
|
0.0002
|
0.003
|
0.003*
|
0.012***
|
|
(0.001)
|
(0.006)
|
(0.001)
|
(0.004)
|
|
|
|
|
|
pop_ln
|
0.011
|
0.074***
|
0.011*
|
0.007
|
|
(0.007)
|
(0.025)
|
(0.006)
|
(0.014)
|
|
|
|
|
|
inttot
|
-0.006
|
0.039
|
0.011
|
-0.003
|
|
(0.015)
|
(0.062)
|
(0.010)
|
(0.027)
|
|
|
|
|
|
civtot
|
0.006
|
0.021
|
0.003
|
-0.004
|
|
(0.005)
|
(0.030)
|
(0.007)
|
(0.017)
|
|
|
|
|
|
loan_ln
|
-0.001
|
0.002
|
-0.001
|
-0.003
|
|
(0.001)
|
(0.006)
|
(0.001)
|
(0.003)
|
|
|
|
|
|
Constant
|
0.247
|
-0.533
|
0.359**
|
0.232
|
|
(0.168)
|
(0.537)
|
(0.166)
|
(0.319)
|
|
|
|
|
|
|
Observations
|
1,310
|
1,310
|
1,310
|
1,209
|
R2
|
0.887
|
0.912
|
0.957
|
0.895
|
Adjusted R2
|
0.887
|
0.911
|
0.957
|
0.894
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
|
OLS models. Robust standard errors clustered by country.
|
|
Models include countries present in the 2001 EDB report.
|
Starting a Business indicators
Not generated with this script.
Enforcing Contracts indicators
Not generated with this script.
Country fixed effects for OLS models
Table D1: “Ranked” coefficients with and without country fixed effects
# All the FE models to be run
country_fe_models <- expand.grid(outcome = c("sb_proced", "sb_days_ln",
"sb_cost_ln", "sb_capital_ln",
"con_proced", "con_days"),
controls = c("Main",
"Main + `civtot_lag` + `inttot_lag` + `loan_ln_lag`",
"Main + `civtot_lag` + `inttot_lag` + `loan_bin_lag`",
"Main + fixed effects",
"Main - 2001", "Main - 2001 & 2002"),
stringsAsFactors = FALSE) %>%
mutate(country_fe = controls %in% c("Main + fixed effects", "Main - 2001", "Main - 2001 & 2002"),
df_to_use = case_when(
controls == "Main - 2001" ~ "Drop 2001",
controls == "Main - 2001 & 2002" ~ "Drop 2002",
TRUE ~ "Full"
))
# Nest filtered data into a data frame
data_to_use <- tribble(
~df_to_use, ~data,
"Full", edb_clean %>% filter(year > 2000, in_2004 == 1),
"Drop 2001", edb_clean %>% filter(year > 2001, in_2004 == 1),
"Drop 2002", edb_clean %>% filter(year > 2002, in_2004 == 1)
)
# Combine filtered data with all model parameters
country_fe_models_full <- data_to_use %>%
right_join(country_fe_models, by = "df_to_use") %>%
select(-df_to_use)
# Build and run a bunch of different models based on parameters that determine
# what controls to add and whether or not to include country fixed effects
run_country_fe_models <- function(outcome, controls, country_fe, df) {
main_controls <- "gdpcap_ln_lag + gdpgrowth_lag + polity_lag + pop_ln_lag"
if (str_detect(controls, "loan_ln_lag")) {
additional_controls <- " + civtot_lag + inttot_lag + loan_ln_lag"
} else if (str_detect(controls, "loan_bin_lag")) {
additional_controls <- " + civtot_lag + inttot_lag + loan_bin_lag"
} else {
additional_controls <- ""
}
if (country_fe) {
country <- " + as.factor(ccode)"
} else {
country <- ""
}
form <- paste0(outcome, " ~ ", paste0(outcome, "_lag"), " + ranked_lag + ",
main_controls, additional_controls, country) %>%
as.formula()
lm(form, data = df)
}
# Run all the models within the data frame
country_fe_models_all <- country_fe_models_full %>%
mutate(model = pmap(.l = list(outcome, controls, country_fe, data),
run_country_fe_models),
# Add robust clustered SEs
robust_se = pmap(.l = list(model, data, "ccode"), robust_clusterify),
# Add model summary statistics
glance = model %>% map(glance),
# Add a data frame of model parameters with correct SEs
tidy_robust = robust_se %>% map(~ tidy(.$coef)),
ses_only = tidy_robust %>% map(~ .$std.error))
# Make tiny data frame indicating if model had country fixed effects
fixed_effects_country <- country_fe_models %>%
select(controls, country_fe) %>%
distinct() %>%
mutate(country_fe = ifelse(country_fe, "Yes", "No")) %>%
spread(controls, country_fe) %>%
mutate(Outcome = "Fixed country effects")
# Display all the ranked coefficients
ranked_coefs_country <- country_fe_models_all %>%
# Spread out the model results
unnest(tidy_robust) %>%
filter(str_detect(term, "ranked")) %>%
# Clean up the estimates and add stars
mutate(value = paste0(sprintf("%.3f", round(estimate, 3)), p_stars(p.value)),
outcome = factor(outcome, levels = unique(country_fe_models$outcome),
labels = paste0("`", unique(country_fe_models$outcome), "`"),
ordered = TRUE),
controls = factor(controls, levels = unique(country_fe_models$controls),
ordered = TRUE)) %>%
# Get rid of extra columns
select(Outcome = outcome, controls, value) %>%
spread(controls, value) %>%
bind_rows(fixed_effects_country)
Summary of coefficients for the lagged “Ranked” variable with and without country fixed effects
sb_proced |
-0.233*** |
-0.247*** |
-0.251*** |
-0.237** |
-0.237** |
-0.225** |
sb_days_ln |
-0.029 |
-0.027 |
-0.029 |
-0.061** |
-0.061** |
-0.057** |
sb_cost_ln |
-0.074*** |
-0.078*** |
-0.079*** |
-0.064** |
-0.064** |
-0.046* |
sb_capital_ln |
-0.003 |
0.005 |
0.003 |
-0.032 |
-0.032 |
-0.032 |
con_proced |
-1.339*** |
-1.293*** |
-1.279*** |
-0.045 |
-0.045 |
-0.204*** |
con_days |
-67.590*** |
-67.839*** |
-67.357*** |
-9.040* |
-9.040* |
-14.108** |
Fixed country effects |
No |
No |
No |
Yes |
Yes |
Yes |
Note: Main controls are gdpcap_ln_lag
, gdpgrowth_lag
, polity_lag
, and pop_ln_lag
Table D2: Sub-indicators for “Starting a Business” with and without country fixed effects
sb_fe <- country_fe_models_all %>%
filter(str_detect(outcome, "sb_"),
controls %in% c("Main + fixed effects",
"Main + `civtot_lag` + `inttot_lag` + `loan_bin_lag`")) %>%
mutate(outcome = fct_inorder(outcome, ordered = TRUE)) %>%
arrange(outcome)
sb_country_fe_models_out <- stargazer(sb_fe$model, type = "html",
dep.var.caption = "",
se = sb_fe$ses_only,
notes = c("OLS models. Robust standard errors clustered by country.",
"Models include countries present in the 2004 EDB report."),
keep.stat = c("n", "rsq", "adj.rsq"),
add.lines = list(c("Country fixed effects",
rep(c("No", "Yes"), 4))),
omit = "ccode")
|
|
sb_proced
|
sb_days_ln
|
sb_cost_ln
|
sb_capital_ln
|
|
(1)
|
(2)
|
(3)
|
(4)
|
(5)
|
(6)
|
(7)
|
(8)
|
|
sb_proced_lag
|
0.920***
|
0.741***
|
|
|
|
|
|
|
|
(0.013)
|
(0.026)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sb_days_ln_lag
|
|
|
0.931***
|
0.751***
|
|
|
|
|
|
|
|
(0.011)
|
(0.022)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sb_cost_ln_lag
|
|
|
|
|
0.948***
|
0.695***
|
|
|
|
|
|
|
|
(0.009)
|
(0.041)
|
|
|
|
|
|
|
|
|
|
|
|
sb_capital_ln_lag
|
|
|
|
|
|
|
0.924***
|
0.742***
|
|
|
|
|
|
|
|
(0.009)
|
(0.025)
|
|
|
|
|
|
|
|
|
|
ranked_lag
|
-0.251***
|
-0.237**
|
-0.029
|
-0.061**
|
-0.079***
|
-0.064**
|
0.003
|
-0.032
|
|
(0.069)
|
(0.096)
|
(0.019)
|
(0.029)
|
(0.018)
|
(0.029)
|
(0.045)
|
(0.063)
|
|
|
|
|
|
|
|
|
|
gdpcap_ln_lag
|
-0.017
|
-0.420***
|
-0.015
|
-0.125***
|
-0.027**
|
-0.232***
|
-0.030*
|
-0.224**
|
|
(0.037)
|
(0.152)
|
(0.009)
|
(0.043)
|
(0.012)
|
(0.044)
|
(0.018)
|
(0.106)
|
|
|
|
|
|
|
|
|
|
gdpgrowth_lag
|
-0.021***
|
-0.008
|
-0.007***
|
-0.005***
|
-0.013***
|
-0.007***
|
-0.008
|
0.002
|
|
(0.006)
|
(0.007)
|
(0.002)
|
(0.002)
|
(0.002)
|
(0.002)
|
(0.005)
|
(0.006)
|
|
|
|
|
|
|
|
|
|
polity_lag
|
-0.005
|
-0.020
|
-0.002
|
-0.005
|
0.0004
|
-0.011*
|
0.004
|
0.018
|
|
(0.006)
|
(0.018)
|
(0.001)
|
(0.005)
|
(0.001)
|
(0.006)
|
(0.004)
|
(0.019)
|
|
|
|
|
|
|
|
|
|
pop_ln_lag
|
0.041*
|
-1.244**
|
0.006
|
-0.172
|
0.008
|
-0.416***
|
-0.009
|
-0.294
|
|
(0.024)
|
(0.540)
|
(0.006)
|
(0.116)
|
(0.005)
|
(0.158)
|
(0.012)
|
(0.688)
|
|
|
|
|
|
|
|
|
|
civtot_lag
|
0.028
|
|
0.004
|
|
0.004
|
|
0.014
|
|
|
(0.026)
|
|
(0.005)
|
|
(0.006)
|
|
(0.014)
|
|
|
|
|
|
|
|
|
|
|
inttot_lag
|
0.076*
|
|
-0.004
|
|
0.031**
|
|
-0.087
|
|
|
(0.041)
|
|
(0.016)
|
|
(0.012)
|
|
(0.118)
|
|
|
|
|
|
|
|
|
|
|
loan_bin_lag
|
0.103
|
|
-0.006
|
|
0.036
|
|
-0.063
|
|
|
(0.119)
|
|
(0.029)
|
|
(0.032)
|
|
(0.058)
|
|
|
|
|
|
|
|
|
|
|
Constant
|
0.038
|
30.747***
|
0.204
|
5.258**
|
0.213
|
10.979***
|
0.432*
|
7.993
|
|
(0.457)
|
(10.189)
|
(0.137)
|
(2.340)
|
(0.159)
|
(3.055)
|
(0.258)
|
(12.806)
|
|
|
|
|
|
|
|
|
|
|
Country fixed effects
|
No
|
Yes
|
No
|
Yes
|
No
|
Yes
|
No
|
Yes
|
Observations
|
1,659
|
1,660
|
1,659
|
1,660
|
1,659
|
1,660
|
1,558
|
1,559
|
R2
|
0.909
|
0.926
|
0.902
|
0.919
|
0.963
|
0.970
|
0.904
|
0.918
|
Adjusted R2
|
0.909
|
0.919
|
0.902
|
0.911
|
0.963
|
0.968
|
0.903
|
0.910
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
|
OLS models. Robust standard errors clustered by country.
|
|
Models include countries present in the 2004 EDB report.
|
Table D3: Sub-indicators for “Enforcing Contracts” with and without country fixed effects
con_fe <- country_fe_models_all %>%
filter(str_detect(outcome, "con_"),
controls %in% c("Main + fixed effects",
"Main + `civtot_lag` + `inttot_lag` + `loan_bin_lag`")) %>%
mutate(outcome = fct_inorder(outcome, ordered = TRUE)) %>%
arrange(outcome)
con_country_fe_models_out <- stargazer(con_fe$model, type = "html",
dep.var.caption = "",
se = con_fe$ses_only,
notes = c("OLS models. Robust standard errors clustered by country.",
"Models include countries present in the 2004 EDB report."),
keep.stat = c("n", "rsq", "adj.rsq"),
add.lines = list(c("Country fixed effects",
rep(c("No", "Yes"), 4))),
omit = "ccode")
|
|
con_proced
|
con_days
|
|
(1)
|
(2)
|
(3)
|
(4)
|
|
con_proced_lag
|
0.708***
|
0.379***
|
|
|
|
(0.023)
|
(0.027)
|
|
|
|
|
|
|
|
con_days_lag
|
|
|
0.903***
|
0.451***
|
|
|
|
(0.015)
|
(0.035)
|
|
|
|
|
|
ranked_lag
|
-1.279***
|
-0.045
|
-67.357***
|
-9.040*
|
|
(0.258)
|
(0.122)
|
(5.427)
|
(4.990)
|
|
|
|
|
|
gdpcap_ln_lag
|
-0.383***
|
0.200
|
-5.263
|
3.433
|
|
(0.145)
|
(0.269)
|
(3.237)
|
(7.930)
|
|
|
|
|
|
gdpgrowth_lag
|
-0.038
|
0.011
|
-2.627***
|
-0.920
|
|
(0.024)
|
(0.015)
|
(0.676)
|
(0.589)
|
|
|
|
|
|
polity_lag
|
-0.112***
|
0.044
|
1.219**
|
-0.785
|
|
(0.039)
|
(0.048)
|
(0.525)
|
(1.765)
|
|
|
|
|
|
pop_ln_lag
|
0.023
|
-0.361
|
2.358
|
-48.999
|
|
(0.109)
|
(1.008)
|
(2.561)
|
(33.737)
|
|
|
|
|
|
civtot_lag
|
0.208**
|
|
9.935**
|
|
|
(0.101)
|
|
(4.295)
|
|
|
|
|
|
|
inttot_lag
|
0.292
|
|
-2.168
|
|
|
(0.393)
|
|
(3.915)
|
|
|
|
|
|
|
loan_bin_lag
|
-0.371
|
|
3.842
|
|
|
(0.487)
|
|
(11.148)
|
|
|
|
|
|
|
Constant
|
15.702***
|
24.570
|
124.001***
|
1,127.757*
|
|
(2.421)
|
(18.769)
|
(42.007)
|
(645.682)
|
|
|
|
|
|
|
Country fixed effects
|
No
|
Yes
|
No
|
Yes
|
Observations
|
1,634
|
1,635
|
1,633
|
1,634
|
R2
|
0.725
|
0.830
|
0.868
|
0.924
|
Adjusted R2
|
0.723
|
0.814
|
0.867
|
0.917
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
|
OLS models. Robust standard errors clustered by country.
|
|
Models include countries present in the 2004 EDB report.
|
Year fixed effects for OLS models
Table E1: “Ranked” coefficients with and without year fixed effects
# All the FE models to be run
year_fe_models <- expand.grid(outcome = c("sb_proced", "sb_days_ln",
"sb_cost_ln", "sb_capital_ln",
"con_proced", "con_days"),
controls = c("Main",
"Main + `civtot_lag` + `inttot_lag` + `loan_ln_lag`",
"Main + `civtot_lag` + `inttot_lag` + `loan_bin_lag`",
"Main + fixed effects",
"Main - 2001", "Main - 2001 & 2002"),
stringsAsFactors = FALSE) %>%
mutate(year_fe = controls %in% c("Main + fixed effects", "Main - 2001", "Main - 2001 & 2002"),
df_to_use = case_when(
controls == "Main - 2001" ~ "Drop 2001",
controls == "Main - 2001 & 2002" ~ "Drop 2002",
TRUE ~ "Full"
))
# Nest filtered data into a data frame
data_to_use <- tribble(
~df_to_use, ~data,
"Full", edb_clean %>% filter(year > 2000, in_2004 == 1),
"Drop 2001", edb_clean %>% filter(year > 2001, in_2004 == 1),
"Drop 2002", edb_clean %>% filter(year > 2002, in_2004 == 1)
)
# Combine filtered data with all model parameters
year_fe_models_full <- data_to_use %>%
right_join(year_fe_models, by = "df_to_use") %>%
select(-df_to_use)
# Build and run a bunch of different models based on parameters that determine
# what controls to add and whether or not to include country fixed effects
run_year_fe_models <- function(outcome, controls, year_fe, df) {
main_controls <- "gdpcap_ln_lag + gdpgrowth_lag + polity_lag + pop_ln_lag"
if (str_detect(controls, "loan_ln_lag")) {
additional_controls <- " + civtot_lag + inttot_lag + loan_ln_lag"
} else if (str_detect(controls, "loan_bin_lag")) {
additional_controls <- " + civtot_lag + inttot_lag + loan_bin_lag"
} else {
additional_controls <- ""
}
if (year_fe) {
year <- " + as.factor(year)"
} else {
year <- ""
}
form <- paste0(outcome, " ~ ", paste0(outcome, "_lag"), " + ranked_lag + ",
main_controls, additional_controls, year) %>%
as.formula()
lm(form, data = df)
}
# Run all the models within the data frame
year_fe_models_all <- year_fe_models_full %>%
mutate(model = pmap(.l = list(outcome, controls, year_fe, data),
run_year_fe_models),
# Add robust clustered SEs
robust_se = pmap(.l = list(model, data, "ccode"), robust_clusterify),
# Add model summary statistics
glance = model %>% map(glance),
# Add a data frame of model parameters with correct SEs
tidy_robust = robust_se %>% map(~ tidy(.$coef)),
ses_only = tidy_robust %>% map(~ .$std.error))
# Make tiny data frame indicating if model had country fixed effects
fixed_effects_year <- year_fe_models %>%
select(controls, year_fe) %>%
distinct() %>%
mutate(year_fe = ifelse(year_fe, "Yes", "No")) %>%
spread(controls, year_fe) %>%
mutate(Outcome = "Fixed year effects")
# Display all the ranked coefficients
ranked_coefs_year <- year_fe_models_all %>%
# Spread out the model results
unnest(tidy_robust) %>%
filter(str_detect(term, "ranked")) %>%
# Clean up the estimates and add stars
mutate(value = paste0(sprintf("%.3f", round(estimate, 3)), p_stars(p.value)),
outcome = factor(outcome, levels = unique(year_fe_models$outcome),
labels = paste0("`", unique(year_fe_models$outcome), "`"),
ordered = TRUE),
controls = factor(controls, levels = unique(year_fe_models$controls),
ordered = TRUE)) %>%
# Get rid of extra columns
select(Outcome = outcome, controls, value) %>%
spread(controls, value) %>%
bind_rows(fixed_effects_year)
Summary of coefficients for the lagged “Ranked” variable with and without country fixed effects
sb_proced |
-0.233*** |
-0.247*** |
-0.251*** |
0.102 |
0.102 |
0.109 |
sb_days_ln |
-0.029 |
-0.027 |
-0.029 |
0.053 |
0.053 |
0.058 |
sb_cost_ln |
-0.074*** |
-0.078*** |
-0.079*** |
0.033 |
0.033 |
0.034 |
sb_capital_ln |
-0.003 |
0.005 |
0.003 |
0.320 |
0.320 |
0.320 |
con_proced |
-1.339*** |
-1.293*** |
-1.279*** |
0.348 |
0.348 |
0.395 |
con_days |
-67.590*** |
-67.839*** |
-67.357*** |
-1.605 |
-1.605 |
-1.585 |
Fixed year effects |
No |
No |
No |
Yes |
Yes |
Yes |
Note: Main controls are gdpcap_ln_lag
, gdpgrowth_lag
, polity_lag
, and pop_ln_lag
Table E2: Sub-indicators for “Starting a Business” with and without year fixed effects
sb_fe_year <- year_fe_models_all %>%
filter(str_detect(outcome, "sb_"),
controls %in% c("Main + fixed effects",
"Main + `civtot_lag` + `inttot_lag` + `loan_bin_lag`")) %>%
mutate(outcome = fct_inorder(outcome, ordered = TRUE)) %>%
arrange(outcome)
sb_year_fe_models_out <- stargazer(sb_fe_year$model, type = "html",
dep.var.caption = "",
se = sb_fe_year$ses_only,
notes = c("OLS models. Robust standard errors clustered by country.",
"Models include countries present in the 2004 EDB report."),
keep.stat = c("n", "rsq", "adj.rsq"),
add.lines = list(c("Year fixed effects",
rep(c("No", "Yes"), 4))),
omit = "year")
|
|
sb_proced
|
sb_days_ln
|
sb_cost_ln
|
sb_capital_ln
|
|
(1)
|
(2)
|
(3)
|
(4)
|
(5)
|
(6)
|
(7)
|
(8)
|
|
sb_proced_lag
|
0.920***
|
0.920***
|
|
|
|
|
|
|
|
(0.013)
|
(0.015)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sb_days_ln_lag
|
|
|
0.931***
|
0.932***
|
|
|
|
|
|
|
|
(0.011)
|
(0.012)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sb_cost_ln_lag
|
|
|
|
|
0.948***
|
0.950***
|
|
|
|
|
|
|
|
(0.009)
|
(0.009)
|
|
|
|
|
|
|
|
|
|
|
|
sb_capital_ln_lag
|
|
|
|
|
|
|
0.924***
|
0.926***
|
|
|
|
|
|
|
|
(0.009)
|
(0.009)
|
|
|
|
|
|
|
|
|
|
ranked_lag
|
-0.251***
|
0.102
|
-0.029
|
0.053
|
-0.079***
|
0.033
|
0.003
|
0.320
|
|
(0.069)
|
(0.335)
|
(0.019)
|
(0.083)
|
(0.018)
|
(0.052)
|
(0.045)
|
(0.281)
|
|
|
|
|
|
|
|
|
|
gdpcap_ln_lag
|
-0.017
|
-0.038*
|
-0.015
|
-0.014**
|
-0.027**
|
-0.032***
|
-0.030*
|
-0.017
|
|
(0.037)
|
(0.022)
|
(0.009)
|
(0.006)
|
(0.012)
|
(0.009)
|
(0.018)
|
(0.012)
|
|
|
|
|
|
|
|
|
|
gdpgrowth_lag
|
-0.021***
|
-0.020***
|
-0.007***
|
-0.007***
|
-0.013***
|
-0.011***
|
-0.008
|
-0.009*
|
|
(0.006)
|
(0.006)
|
(0.002)
|
(0.002)
|
(0.002)
|
(0.002)
|
(0.005)
|
(0.005)
|
|
|
|
|
|
|
|
|
|
polity_lag
|
-0.005
|
-0.006
|
-0.002
|
-0.002*
|
0.0004
|
0.0001
|
0.004
|
0.004
|
|
(0.006)
|
(0.006)
|
(0.001)
|
(0.001)
|
(0.001)
|
(0.001)
|
(0.004)
|
(0.004)
|
|
|
|
|
|
|
|
|
|
pop_ln_lag
|
0.041*
|
0.058**
|
0.006
|
0.008*
|
0.008
|
0.011**
|
-0.009
|
-0.003
|
|
(0.024)
|
(0.023)
|
(0.006)
|
(0.005)
|
(0.005)
|
(0.005)
|
(0.012)
|
(0.011)
|
|
|
|
|
|
|
|
|
|
civtot_lag
|
0.028
|
|
0.004
|
|
0.004
|
|
0.014
|
|
|
(0.026)
|
|
(0.005)
|
|
(0.006)
|
|
(0.014)
|
|
|
|
|
|
|
|
|
|
|
inttot_lag
|
0.076*
|
|
-0.004
|
|
0.031**
|
|
-0.087
|
|
|
(0.041)
|
|
(0.016)
|
|
(0.012)
|
|
(0.118)
|
|
|
|
|
|
|
|
|
|
|
loan_bin_lag
|
0.103
|
|
-0.006
|
|
0.036
|
|
-0.063
|
|
|
(0.119)
|
|
(0.029)
|
|
(0.032)
|
|
(0.058)
|
|
|
|
|
|
|
|
|
|
|
Constant
|
0.038
|
-0.095
|
0.204
|
0.112
|
0.213
|
0.257*
|
0.432*
|
-0.004
|
|
(0.457)
|
(0.426)
|
(0.137)
|
(0.121)
|
(0.159)
|
(0.140)
|
(0.258)
|
(0.238)
|
|
|
|
|
|
|
|
|
|
|
Year fixed effects
|
No
|
Yes
|
No
|
Yes
|
No
|
Yes
|
No
|
Yes
|
Observations
|
1,659
|
1,660
|
1,659
|
1,660
|
1,659
|
1,660
|
1,558
|
1,559
|
R2
|
0.909
|
0.910
|
0.902
|
0.903
|
0.963
|
0.963
|
0.904
|
0.906
|
Adjusted R2
|
0.909
|
0.909
|
0.902
|
0.902
|
0.963
|
0.963
|
0.903
|
0.905
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
|
OLS models. Robust standard errors clustered by country.
|
|
Models include countries present in the 2004 EDB report.
|
Table E3: Sub-indicators for “Enforcing Contracts” with and without year fixed effects
con_fe_year <- year_fe_models_all %>%
filter(str_detect(outcome, "con_"),
controls %in% c("Main + fixed effects",
"Main + `civtot_lag` + `inttot_lag` + `loan_bin_lag`")) %>%
mutate(outcome = fct_inorder(outcome, ordered = TRUE)) %>%
arrange(outcome)
con_year_fe_models_out <- stargazer(con_fe_year$model, type = "html",
dep.var.caption = "",
se = con_fe_year$ses_only,
notes = c("OLS models. Robust standard errors clustered by country.",
"Models include countries present in the 2004 EDB report."),
keep.stat = c("n", "rsq", "adj.rsq"),
add.lines = list(c("Year fixed effects",
rep(c("No", "Yes"), 4))),
omit = "year")
|
|
con_proced
|
con_days
|
|
(1)
|
(2)
|
(3)
|
(4)
|
|
con_proced_lag
|
0.708***
|
0.780***
|
|
|
|
(0.023)
|
(0.032)
|
|
|
|
|
|
|
|
con_days_lag
|
|
|
0.903***
|
0.971***
|
|
|
|
(0.015)
|
(0.008)
|
|
|
|
|
|
ranked_lag
|
-1.279***
|
0.348
|
-67.357***
|
-1.605
|
|
(0.258)
|
(0.506)
|
(5.427)
|
(3.386)
|
|
|
|
|
|
gdpcap_ln_lag
|
-0.383***
|
-0.167
|
-5.263
|
-3.550**
|
|
(0.145)
|
(0.104)
|
(3.237)
|
(1.559)
|
|
|
|
|
|
gdpgrowth_lag
|
-0.038
|
-0.016
|
-2.627***
|
-1.292***
|
|
(0.024)
|
(0.025)
|
(0.676)
|
(0.463)
|
|
|
|
|
|
polity_lag
|
-0.112***
|
-0.083**
|
1.219**
|
0.573
|
|
(0.039)
|
(0.032)
|
(0.525)
|
(0.376)
|
|
|
|
|
|
pop_ln_lag
|
0.023
|
0.124
|
2.358
|
3.673**
|
|
(0.109)
|
(0.087)
|
(2.561)
|
(1.839)
|
|
|
|
|
|
civtot_lag
|
0.208**
|
|
9.935**
|
|
|
(0.101)
|
|
(4.295)
|
|
|
|
|
|
|
inttot_lag
|
0.292
|
|
-2.168
|
|
|
(0.393)
|
|
(3.915)
|
|
|
|
|
|
|
loan_bin_lag
|
-0.371
|
|
3.842
|
|
|
(0.487)
|
|
(11.148)
|
|
|
|
|
|
|
Constant
|
15.702***
|
4.800***
|
124.001***
|
17.250
|
|
(2.421)
|
(1.757)
|
(42.007)
|
(35.766)
|
|
|
|
|
|
|
Year fixed effects
|
No
|
Yes
|
No
|
Yes
|
Observations
|
1,634
|
1,635
|
1,633
|
1,634
|
R2
|
0.725
|
0.836
|
0.868
|
0.923
|
Adjusted R2
|
0.723
|
0.834
|
0.867
|
0.922
|
|
Note:
|
*p<0.1; **p<0.05; ***p<0.01
|
|
OLS models. Robust standard errors clustered by country.
|
|
Models include countries present in the 2004 EDB report.
|
India experiment survey text
Not generated with this script.
Investor experiment survey text
Not generated with this script.
