Playground

Code

library(tidyverse)
library(scales)
library(scico)
library(fixest)
library(rdrobust)
library(rdmulti)
library(parameters)

df_2021 <- readRDS(here::here("data/clean_2021.rds")) |>
  mutate(
    revenue_c = revenue - 200000,
    assets_c = assets - 500000,
    eligible = revenue < 200000 & assets < 500000
  )

clrs <- scico::scico(7, palette = "batlow", categorical = TRUE)

Discontinuities in running variables

Two running variables

For a nonprofit to file a 990-EZ, it has to meet two conditions:

Have gross receipts (revenue) less than $200,000 annually
Have less than $500,000 in assets

This creates a situation where we have two running variables, or a bivariate score (Cattaneo et al. 2021, 2016; Matsudaira 2008) or multiple rating score (Reardon and Robinson 2012).

L-shaped zone

For causal inference purposes, we look at comparable nonprofts within a narrow bandwidth around the cutoff, but we have to look at both cutoffs simultaneously, creating this L-shape—organizations below and to the left of the two boundaries are treated, while organizations above and to the right are untreated.

Code

df_2021 |>
  arrange(ez) |> # plot the false points first so they don't cover up the trues
  ggplot(aes(x = revenue, y = assets)) +
  geom_point(aes(color = form), alpha = 0.15, size = 0.005) +
  annotate(
    geom = "segment",
    x = 0,
    xend = 200000,
    y = 500000,
    color = clrs[7],
    linewidth = 7,
    alpha = 0.75
  ) +
  annotate(
    geom = "segment",
    x = 0,
    xend = 200000,
    y = 500000,
    color = "white",
    linewidth = 0.5,
    alpha = 1
  ) +
  annotate(
    geom = "segment",
    x = 200000,
    y = 0,
    yend = 500000,
    color = clrs[7],
    linewidth = 7,
    alpha = 0.75
  ) +
  annotate(
    geom = "segment",
    x = 200000,
    y = 0,
    yend = 500000,
    color = "white",
    linewidth = 0.5,
    alpha = 1
  ) +
  # geom_label(
  #   data = focus_areas,
  #   aes(label = label),
  #   label.r = unit(0.6, "lines"),
  #   family = "Inter",
  #   fontface = "bold"
  # ) +
  scale_x_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  scale_color_manual(
    values = c(clrs[5], clrs[6]),
    guide = guide_legend(override.aes = list(size = 2.5, alpha = 1))
  ) +
  labs(
    x = "Total revenue",
    y = "Total assets",
    color = "IRS form"
  ) +
  coord_cartesian(xlim = c(0, 300000), ylim = c(0, 750000)) +
  theme_minimal(base_family = "Inter") +
  theme(
    axis.title.x = element_text(hjust = 0),
    axis.title.y = element_text(hjust = 1),
    legend.position = "bottom",
    legend.justification = "left",
    legend.title.position = "left",
    legend.margin = margin(l = 0, t = 0)
  )

Fuzzy compliance!

Further complicating things, there is substantial imperfect compliance. Not every eligible nonprofit files a 990-EZ, and some that shouldn’t qualify file one anyway. For instance, among organizations that were at most $100,000 below the asset threshold, only 43% filed a 990-EZ. 57% could have but didn’t.

Code

df_2021 |> 
  filter(assets > 400000 & assets < 500000) |> 
  count(ez, low_revenue = revenue < 200000) |> 
  group_by(low_revenue) |> 
  mutate(prop = n / sum(n))
## # A tibble: 4 × 4
## # Groups:   low_revenue [2]
##   ez    low_revenue     n     prop
##   <lgl> <lgl>       <int>    <dbl>
## 1 FALSE FALSE        5232 0.999   
## 2 FALSE TRUE         1764 0.550   
## 3 TRUE  FALSE           3 0.000573
## 4 TRUE  TRUE         1442 0.450

We can visualize that fuzzy noncompliance:

Code

bucket_size <- 25000

heatmap_data <- df_2021 |>
  mutate(
    revenue_bucket = floor(revenue / bucket_size) * bucket_size,
    assets_bucket = floor(assets / bucket_size) * bucket_size
  ) |>
  group_by(revenue_bucket, assets_bucket) |>
  summarize(proportion_treated = mean(ez == TRUE)) |>
  # geom_tile plots from the center of each bucket, so shift these over a bit
  mutate(
    revenue_bucket = revenue_bucket + (bucket_size / 2),
    assets_bucket = assets_bucket + (bucket_size / 2)
  )

ggplot(
  heatmap_data,
  aes(x = revenue_bucket, y = assets_bucket, fill = proportion_treated)
) +
  geom_tile() +
  geom_vline(xintercept = 200000, color = clrs[7]) +
  geom_hline(yintercept = 500000, color = clrs[7]) +
  scale_fill_scico(
    palette = "batlow",
    labels = label_percent(),
    guide = guide_colorbar(barwidth = 15, barheight = 0.5)
  ) +
  scale_x_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  labs(
    x = "Total revenue",
    y = "Total assets",
    fill = "% filed 990-EZ"
  ) +
  coord_cartesian(xlim = c(0, 300000), ylim = c(0, 750000)) +
  theme_minimal(base_family = "Inter") +
  theme(
    axis.title.x = element_text(hjust = 0),
    axis.title.y = element_text(hjust = 1),
    legend.position = "bottom",
    legend.justification = "left",
    legend.title.position = "top",
    legend.margin = margin(l = 0, t = 0)
  )

And with a 3D plot because why not (and because Matsudaira (2008) did).

Code

library(plotly)

tiny_bucket_size <- 10000

plot_heatmap_data <- df_2021 |>
  filter(revenue < 300000, assets < 750000) |> 
  mutate(
    revenue_bucket = floor(revenue / tiny_bucket_size) * tiny_bucket_size,
    assets_bucket = floor(assets / tiny_bucket_size) * tiny_bucket_size
  ) |>
  group_by(revenue_bucket, assets_bucket) |>
  summarize(proportion_treated = mean(ez == TRUE))

plot_ly(
  data = plot_heatmap_data,
  x = ~revenue_bucket, 
  y = ~assets_bucket, 
  z = ~proportion_treated,
  type = "mesh3d",
  intensity = ~proportion_treated,
  colors = scico::scico(100, palette = "batlow"),
  showscale = TRUE,
  colorbar = list(title = "% filed 990-EZ")
) |>
  layout(
    scene = list(
      xaxis = list(title = "Total revenue"),
      yaxis = list(title = "Total assets"),
      zaxis = list(title = "% filed 990-EZ")
    )
  )

Discontinuities in outcome variables

So, we have to deal with two fuzzy discontinuities simultaneously… somehow… Reardon and Robinson (2012) have 5 suggested approaches dealing with two discontinuities, and each have fuzzy analogues. The {rdmulti} package also supports multiple fuzzy discontinuities for nonparametric estimation.

We try three different approaches from Reardon and Robinson (2012) here: frontier RD, response surface RD, and binding score RD.

Frontier RD

With frontier RD, we look at the cutoffs—or each of the lines of the L—in isolation.

For instance, we can look at organizations around the revenue threshold, but with assets below the asset threshold. This is the vertical line of the L—there’s a lot of variation in asset levels, but everyone has $200,000 ± some amount in revenue.

Here’s the OLS version:

Code

low_assets_only <- df_2021 |>
  filter(assets > 300000 & assets < 500000) |>
  filter(revenue > 175000 & revenue < 225000) |>
  filter(
    profit_margin_ratio >= quantile(profit_margin_ratio, 0.025) &
      profit_margin_ratio <= quantile(profit_margin_ratio, 0.975)
  )

frontier_revenue_ols <- feols(
  profit_margin_ratio ~ revenue_c + I(revenue_c^2) | 0 | ez ~ eligible,
  data = low_assets_only,
  vcov = "HC1"
)
summary(frontier_revenue_ols)
## TSLS estimation - Dep. Var.: profit_margin_ratio
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: profit_margin_ratio
## Observations: 1,478
## Standard-errors: Heteroskedasticity-robust 
##                 Estimate Std. Error t value   Pr(>|t|)    
## (Intercept)    8.027e-02  1.776e-02 4.52091 6.6502e-06 ***
## fit_ezTRUE     8.157e-02  8.095e-02 1.00769 3.1377e-01    
## revenue_c      1.009e-06  9.733e-07 1.03633 3.0022e-01    
## I(revenue_c^2) 2.260e-12  3.830e-11 0.05887 9.5306e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.267148   Adj. R2: 0.02159
## F-test (1st stage), ezTRUE: stat = 91.6     , p < 2.2e-16 , on 1 and 1,474 DoF.
##                 Wu-Hausman: stat =  0.326124, p = 0.568038, on 1 and 1,473 DoF.

model_parameters(frontier_revenue_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(1474) |     p
## -----------------------------------------------------------------
## fit ezTRUE |        0.08 | 0.08 | [-0.08, 0.24] |    1.01 | 0.314

And the rdrobust() version:

Code

rd_frontier_revenue <- rdrobust(
  y = low_assets_only$profit_margin_ratio,
  x = low_assets_only$revenue_c,
  c = 0,
  fuzzy = low_assets_only$ez,
  p = 2
)
summary(rd_frontier_revenue)
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.                 1478
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  751          727
## Eff. Number of Obs.             145          131
## Order est. (p)                    2            2
## Order bias  (q)                   3            3
## BW est. (h)                4117.115     4117.115
## BW bias (b)                9375.850     9375.850
## rho (h/b)                     0.439        0.439
## Unique Obs.                     736          714
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.360     0.157    -2.286     0.022    [-0.668 , -0.051]    
##         Robust         -         -    -2.244     0.025    [-0.681 , -0.046]    
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional     0.216     0.243     0.890     0.373    [-0.260 , 0.692]     
##         Robust         -         -     0.898     0.369    [-0.263 , 0.708]     
## =============================================================================

rdplot(
  y = low_assets_only$profit_margin_ratio,
  x = low_assets_only$revenue_c,
  c = 0,
  p = 2,
  hide = TRUE
)$rdplot +
  labs(x = "Revenue", y = "Profit/margin ratio", title = NULL) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal(base_family = "Inter")

And here’s the vertical part of the L, where everyone has roughly the same assets and everyone has less than $200,000 in revenue:

We can do it with OLS:

Code

low_revenue_only <- df_2021 |> 
  filter(revenue > 100000 & revenue < 200000) |> 
  filter(assets > 450000 & assets < 550000) |> 
  filter(
    profit_margin_ratio >= quantile(profit_margin_ratio, 0.025) &
      profit_margin_ratio <= quantile(profit_margin_ratio, 0.975)
  )

frontier_assets_ols <- feols(
  profit_margin_ratio ~ assets_c + I(assets_c^2) | 0 | ez ~ eligible,
  data = low_revenue_only,
  vcov = "HC1"
)
summary(frontier_assets_ols)
## TSLS estimation - Dep. Var.: profit_margin_ratio
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: profit_margin_ratio
## Observations: 1,294
## Standard-errors: Heteroskedasticity-robust 
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)    4.569e-02  2.116e-02  2.15915 0.031022 *  
## fit_ezTRUE     2.534e-02  8.718e-02  0.29069 0.771339    
## assets_c       1.888e-08  5.152e-07  0.03666 0.970764    
## I(assets_c^2) -1.740e-12  1.090e-11 -0.15966 0.873174    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.300436   Adj. R2: 0.003748
## F-test (1st stage), ezTRUE: stat = 90.0     , p < 2.2e-16 , on 1 and 1,290 DoF.
##                 Wu-Hausman: stat =  0.573593, p = 0.448973, on 1 and 1,289 DoF.

model_parameters(frontier_assets_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(1290) |     p
## -----------------------------------------------------------------
## fit ezTRUE |        0.03 | 0.09 | [-0.15, 0.20] |    0.29 | 0.771

Or with fancier rdrobust() stuff:

Code

rd_frontier_assets <- rdrobust(
  y = low_revenue_only$profit_margin_ratio,
  x = low_revenue_only$assets_c,
  c = 0,
  fuzzy = low_revenue_only$ez,
  p = 2
)
summary(rd_frontier_assets)
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.                 1294
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  708          586
## Eff. Number of Obs.              86           74
## Order est. (p)                    2            2
## Order bias  (q)                   3            3
## BW est. (h)                6367.454     6367.454
## BW bias (b)               21235.398    21235.398
## rho (h/b)                     0.300        0.300
## Unique Obs.                     699          584
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.615     0.150    -4.104     0.000    [-0.909 , -0.321]    
##         Robust         -         -    -4.096     0.000    [-0.912 , -0.322]    
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional     0.130     0.198     0.655     0.512    [-0.258 , 0.517]     
##         Robust         -         -     0.639     0.523    [-0.263 , 0.517]     
## =============================================================================

rdplot(
  y = low_revenue_only$profit_margin_ratio,
  x = low_revenue_only$assets_c,
  c = 0,
  p = 2,
  hide = TRUE
)$rdplot +
  labs(x = "Assets", y = "Profit/margin ratio", title = NULL) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal(base_family = "Inter")

Response surface RD

Another alternative is to look at the whole L simultaneously. Reardon and Robinson (2012) call this area the “response surface”. This is tricky to conceptualize because there are multiple dimensions—like the 3D plot earlier of treatment status, revenue and assets create a step or cliff along the L-shaped boundary, were the z-axis is the outcome (rather than probability of treatment that we saw earlier). The response surface RD estimates the height of this cliff.

Code

L_bandwidth_area <- df_2021 |>
  filter(revenue > 175000 & revenue < 225000) |>
  filter(assets > 450000 & assets < 550000) |>
  mutate(
    # Calculate distance to nearest threshold
    dist_revenue = abs(revenue_c) / 50000,
    dist_assets = abs(assets_c) / 100000,
    dist_to_frontier = pmin(dist_revenue, dist_assets),
    # Make triangular weights
    kernel_weight = pmax(0, 1 - dist_to_frontier)
  ) |>
  filter(
    profit_margin_ratio >= quantile(profit_margin_ratio, 0.025) &
      profit_margin_ratio <= quantile(profit_margin_ratio, 0.975)
  )

surface_ols <- feols(
  profit_margin_ratio ~
    revenue_c +
      I(revenue_c^2) +
      assets_c +
      I(assets_c^2) +
      revenue_c:assets_c |
      0 |
      ez ~
    eligible,
  data = L_bandwidth_area,
  weights = L_bandwidth_area$kernel_weight,
  vcov = "HC1"
)
summary(surface_ols)
## TSLS estimation - Dep. Var.: profit_margin_ratio
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: profit_margin_ratio
## Observations: 548
## Weights: L_bandwidth_area$kernel_weight
## Standard-errors: Heteroskedasticity-robust 
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.031e-02  2.267e-02  1.7783 0.075913 .  
## fit_ezTRUE          3.627e-02  1.508e-01  0.2405 0.810071    
## revenue_c           1.376e-06  9.878e-07  1.3927 0.164271    
## I(revenue_c^2)      2.070e-11  6.700e-11  0.3089 0.757532    
## assets_c           -5.063e-08  4.886e-07 -0.1036 0.917520    
## I(assets_c^2)       2.890e-11  1.500e-11  1.9230 0.055001 .  
## revenue_c:assets_c -4.420e-11  3.200e-11 -1.3823 0.167444    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.243479   Adj. R2: 0.009323
## F-test (1st stage), ezTRUE: stat = 73.4     , p < 2.2e-16 , on 1 and 541 DoF.
##                 Wu-Hausman: stat =  0.381221, p = 0.537211, on 1 and 540 DoF.

model_parameters(surface_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(541) |     p
## ----------------------------------------------------------------
## fit ezTRUE |        0.04 | 0.15 | [-0.26, 0.33] |   0.24 | 0.810

That’s really hard to visualize though!

Code

bucket_size <- 10000

heatmap_data_outcome <- df_2021 |>
  filter(
    profit_margin_ratio >= quantile(profit_margin_ratio, 0.025) &
      profit_margin_ratio <= quantile(profit_margin_ratio, 0.975)
  ) |> 
  mutate(
    revenue_bucket = floor(revenue / bucket_size) * bucket_size,
    assets_bucket = floor(assets / bucket_size) * bucket_size
  ) |>
  group_by(revenue_bucket, assets_bucket) |>
  summarize(avg_outcome = mean(profit_margin_ratio)) |>
  # geom_tile plots from the center of each bucket, so shift these over a bit
  mutate(
    revenue_bucket = revenue_bucket + (bucket_size / 2),
    assets_bucket = assets_bucket + (bucket_size / 2)
  )

ggplot(
  heatmap_data_outcome,
  aes(x = revenue_bucket, y = assets_bucket, fill = avg_outcome)
) +
  geom_tile() +
  geom_vline(xintercept = 200000, color = clrs[7]) +
  geom_hline(yintercept = 500000, color = clrs[7]) +
  scale_fill_scico(
    palette = "batlow",
    # trans = "sqrt",
    guide = guide_colorbar(barwidth = 15, barheight = 0.5)
  ) +
  scale_x_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  labs(
    x = "Total revenue",
    y = "Total assets",
    fill = "Average profit/margin ratio"
  ) +
  coord_cartesian(xlim = c(0, 300000), ylim = c(0, 750000)) +
  theme_minimal(base_family = "Inter") +
  theme(
    axis.title.x = element_text(hjust = 0),
    axis.title.y = element_text(hjust = 1),
    legend.position = "bottom",
    legend.justification = "left",
    legend.title.position = "top",
    legend.margin = margin(l = 0, t = 0)
  )

We can do this with {rdmulti} too:

Code

df_truncated_outcome <- df_2021 |> 
  filter(
    profit_margin_ratio >= quantile(profit_margin_ratio, 0.025) &
      profit_margin_ratio <= quantile(profit_margin_ratio, 0.975)
  )

rd_full_L <- rdms(
  Y = df_truncated_outcome$profit_margin_ratio,
  X = df_truncated_outcome$revenue_c,
  C = 0,
  X2 = df_truncated_outcome$assets_c,
  C2 = 0,
  zvar = df_truncated_outcome$ez,
  fuzzy = df_truncated_outcome$ez
)
## 
## ================================================================================
## Cutoff           Coef.    P-value          95% CI          hl       hr        Nh        
## ================================================================================
## (0.00,0.00)      0.040   0.849     -0.130    0.158     89750.25489750.254 2861      
## ================================================================================

Binding score RD

The binding score identifies which standardized running variables is the “binding constraint” and is the minimum of the two standardized scores. It shows:

Which threshold (revenue or assets) is relatively closer to being crossed
How far away the observation is from treatment eligibility

A negative binding score means the observation is eligible for treatment (both revenue and assets are below their thresholds).

The distance score measures the minimum perpendicular distance to the treatment boundary, with sign determined by treatment status. it shows:

How close an observation is to changing treatment status
The sign indicates which side of the boundary the observation falls on

Code

binding_data <- df_2021 |>
  mutate(
    rev_std = (revenue - 200000) / sd(revenue),
    asset_std = (assets - 500000) / sd(assets),
    binding_score = pmin(rev_std, asset_std),
    distance_score = pmin(abs(rev_std), abs(asset_std)) * (2*eligible - 1)
  ) |> 
  filter(
    profit_margin_ratio >= quantile(profit_margin_ratio, 0.025) &
      profit_margin_ratio <= quantile(profit_margin_ratio, 0.975)
  )

Code

binding_data_window <- binding_data |>
  arrange(ez) |>
  filter(abs(binding_score) < 0.33)

ggplot(binding_data_window, aes(x = binding_score, y = profit_margin_ratio)) +
  geom_point(aes(color = form)) +
  geom_smooth(
    data = filter(binding_data_window, binding_score < 0),
    method = "lm"
  ) +
  geom_smooth(
    data = filter(binding_data_window, binding_score > 0),
    method = "lm"
  ) +
  geom_vline(xintercept = 0, color = "red") +
  scale_color_manual(
    values = c(clrs[5], clrs[6]),
    guide = guide_legend(override.aes = list(size = 2.5, alpha = 1))
  ) +
  coord_cartesian(ylim = c(-3, 1.1)) +
  labs(x = "Binding score", y = "Profit/margin ratio", color = "IRS form") +
  theme_minimal()

Code


rdplot(
  y = binding_data$profit_margin_ratio,
  x = binding_data$binding_score,
  c = 0,
  hide = TRUE
)$rdplot +
  labs(x = "Binding score", y = "Profit/margin ratio", title = NULL) +
  theme_minimal(base_family = "Inter")

Code


rdrobust(
  y = binding_data$profit_margin_ratio,
  x = binding_data$binding_score,
  c = 0,
  fuzzy = binding_data$ez
) |>
  summary()
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.               113814
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                99165        14649
## Eff. Number of Obs.            2807         2270
## Order est. (p)                    1            1
## Order bias  (q)                   2            2
## BW est. (h)                   0.138        0.138
## BW bias (b)                   0.457        0.457
## rho (h/b)                     0.301        0.301
## Unique Obs.                   87998        14496
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.005     0.003    -1.662     0.097    [-0.011 , 0.001]     
##         Robust         -         -    -1.825     0.068    [-0.011 , 0.000]     
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional     1.299     3.937     0.330     0.741    [-6.417 , 9.014]     
##         Robust         -         -     0.325     0.745    [-6.633 , 9.267]     
## =============================================================================

Leverage

That was all with profit margin ratio. What about leverage, or debt to income ratio?

Frontier RD

Code

low_assets_only <- df_2021 |>
  filter(assets > 300000 & assets < 500000) |>
  filter(revenue > 175000 & revenue < 225000) |>
  filter(
    debt_to_equity_ratio >= quantile(debt_to_equity_ratio, 0.025) &
      debt_to_equity_ratio <= quantile(debt_to_equity_ratio, 0.975)
  )

frontier_revenue_ols <- feols(
  debt_to_equity_ratio ~ revenue_c + I(revenue_c^2) | 0 | ez ~ eligible,
  data = low_assets_only,
  vcov = "HC1"
)
summary(frontier_revenue_ols)
## TSLS estimation - Dep. Var.: debt_to_equity_ratio
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: debt_to_equity_ratio
## Observations: 1,478
## Standard-errors: Heteroskedasticity-robust 
##                  Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)     2.213e-01  5.764e-02  3.83860 0.00012897 ***
## fit_ezTRUE      4.925e-01  2.885e-01  1.70705 0.08802334 .  
## revenue_c       3.373e-06  3.155e-06  1.06933 0.28509843    
## I(revenue_c^2) -1.510e-12  1.160e-10 -0.01308 0.98956712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.950956   Adj. R2: 0.008696
## F-test (1st stage), ezTRUE: stat = 94.2    , p < 2.2e-16 , on 1 and 1,474 DoF.
##                 Wu-Hausman: stat =  0.35962, p = 0.548809, on 1 and 1,473 DoF.

model_parameters(frontier_revenue_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(1474) |     p
## -----------------------------------------------------------------
## fit ezTRUE |        0.49 | 0.29 | [-0.07, 1.06] |    1.71 | 0.088

Code

rd_frontier_revenue <- rdrobust(
  y = low_assets_only$debt_to_equity_ratio,
  x = low_assets_only$revenue_c,
  c = 0,
  fuzzy = low_assets_only$ez,
  p = 2
)
summary(rd_frontier_revenue)
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.                 1478
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  751          727
## Eff. Number of Obs.             118          111
## Order est. (p)                    2            2
## Order bias  (q)                   3            3
## BW est. (h)                3338.732     3338.732
## BW bias (b)                6967.988     6967.988
## rho (h/b)                     0.479        0.479
## Unique Obs.                     736          712
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.304     0.183    -1.664     0.096    [-0.663 , 0.054]     
##         Robust         -         -    -1.608     0.108    [-0.681 , 0.067]     
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional     0.826     2.700     0.306     0.760    [-4.467 , 6.119]     
##         Robust         -         -     0.328     0.743    [-4.550 , 6.376]     
## =============================================================================

rdplot(
  y = low_assets_only$debt_to_equity_ratio,
  x = low_assets_only$revenue_c,
  c = 0,
  p = 2,
  hide = TRUE
)$rdplot +
  labs(x = "Revenue", y = "Debt/equity ratio", title = NULL) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal(base_family = "Inter")

Code

low_revenue_only <- df_2021 |> 
  filter(revenue > 100000 & revenue < 200000) |> 
  filter(assets > 450000 & assets < 550000) |> 
  filter(
    debt_to_equity_ratio >= quantile(debt_to_equity_ratio, 0.025) &
      debt_to_equity_ratio <= quantile(debt_to_equity_ratio, 0.975)
  )

frontier_assets_ols <- feols(
  debt_to_equity_ratio ~ assets_c + I(assets_c^2) | 0 | ez ~ eligible,
  data = low_revenue_only,
  vcov = "HC1"
)
summary(frontier_assets_ols)
## TSLS estimation - Dep. Var.: debt_to_equity_ratio
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: debt_to_equity_ratio
## Observations: 1,294
## Standard-errors: Heteroskedasticity-robust 
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.540e-01  6.516e-02  2.3630 0.018275 *  
## fit_ezTRUE    5.536e-01  2.783e-01  1.9893 0.046882 *  
## assets_c      2.241e-06  1.675e-06  1.3376 0.181259    
## I(assets_c^2) 1.910e-11  3.460e-11  0.5513 0.581554    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.894164   Adj. R2: -0.008185
## F-test (1st stage), ezTRUE: stat = 85.9    , p < 2.2e-16, on 1 and 1,290 DoF.
##                 Wu-Hausman: stat =  1.43033, p = 0.23193, on 1 and 1,289 DoF.

model_parameters(frontier_assets_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |       95% CI | t(1290) |     p
## ----------------------------------------------------------------
## fit ezTRUE |        0.55 | 0.28 | [0.01, 1.10] |    1.99 | 0.047

Code

rd_frontier_assets <- rdrobust(
  y = low_revenue_only$debt_to_equity_ratio,
  x = low_revenue_only$assets_c,
  c = 0,
  fuzzy = low_revenue_only$ez,
  p = 2
)
summary(rd_frontier_assets)
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.                 1294
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  704          590
## Eff. Number of Obs.              67           57
## Order est. (p)                    2            2
## Order bias  (q)                   3            3
## BW est. (h)                4870.155     4870.155
## BW bias (b)               19918.378    19918.378
## rho (h/b)                     0.245        0.245
## Unique Obs.                     695          588
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.520     0.181    -2.880     0.004    [-0.874 , -0.166]    
##         Robust         -         -    -2.878     0.004    [-0.876 , -0.166]    
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional     0.600     0.837     0.717     0.473    [-1.039 , 2.240]     
##         Robust         -         -     0.714     0.475    [-1.044 , 2.242]     
## =============================================================================

rdplot(
  y = low_revenue_only$debt_to_equity_ratio,
  x = low_revenue_only$assets_c,
  c = 0,
  p = 2,
  hide = TRUE
)$rdplot +
  labs(x = "Assets", y = "Debt/equity ratio", title = NULL) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal(base_family = "Inter")

Response surface RD

Code

L_bandwidth_area <- df_2021 |>
  filter(revenue > 175000 & revenue < 225000) |>
  filter(assets > 450000 & assets < 550000) |>
  mutate(
    # Calculate distance to nearest threshold
    dist_revenue = abs(revenue_c) / 50000,
    dist_assets = abs(assets_c) / 100000,
    dist_to_frontier = pmin(dist_revenue, dist_assets),
    # Make triangular weights
    kernel_weight = pmax(0, 1 - dist_to_frontier)
  ) |>
  filter(
    debt_to_equity_ratio >= quantile(debt_to_equity_ratio, 0.025) &
      debt_to_equity_ratio <= quantile(debt_to_equity_ratio, 0.975)
  )

surface_ols <- feols(
  debt_to_equity_ratio ~
    revenue_c +
      I(revenue_c^2) +
      assets_c +
      I(assets_c^2) +
      revenue_c:assets_c |
      0 |
      ez ~
    eligible,
  data = L_bandwidth_area,
  weights = L_bandwidth_area$kernel_weight,
  vcov = "HC1"
)
summary(surface_ols)
## TSLS estimation - Dep. Var.: debt_to_equity_ratio
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: debt_to_equity_ratio
## Observations: 548
## Weights: L_bandwidth_area$kernel_weight
## Standard-errors: Heteroskedasticity-robust 
##                      Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)         3.610e-01  1.056e-01  3.41715 0.00068041 ***
## fit_ezTRUE          5.199e-01  7.029e-01  0.73965 0.45983133    
## revenue_c           2.613e-06  3.815e-06  0.68478 0.49377508    
## I(revenue_c^2)      1.860e-11  2.377e-10  0.07823 0.93767396    
## assets_c            2.952e-06  2.365e-06  1.24832 0.21245399    
## I(assets_c^2)      -6.110e-11  6.640e-11 -0.91892 0.35854652    
## revenue_c:assets_c -9.290e-11  1.287e-10 -0.72166 0.47081583    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.988296   Adj. R2: -0.012626
## F-test (1st stage), ezTRUE: stat = 72.1     , p < 2.2e-16 , on 1 and 541 DoF.
##                 Wu-Hausman: stat =  0.505026, p = 0.477607, on 1 and 540 DoF.

model_parameters(surface_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(541) |     p
## ----------------------------------------------------------------
## fit ezTRUE |        0.52 | 0.70 | [-0.86, 1.90] |   0.74 | 0.460

Code

bucket_size <- 10000

heatmap_data_outcome <- df_2021 |>
  filter(
    debt_to_equity_ratio >= quantile(debt_to_equity_ratio, 0.025) &
      debt_to_equity_ratio <= quantile(debt_to_equity_ratio, 0.975)
  ) |> 
  mutate(
    revenue_bucket = floor(revenue / bucket_size) * bucket_size,
    assets_bucket = floor(assets / bucket_size) * bucket_size
  ) |>
  group_by(revenue_bucket, assets_bucket) |>
  summarize(avg_outcome = mean(debt_to_equity_ratio)) |>
  # geom_tile plots from the center of each bucket, so shift these over a bit
  mutate(
    revenue_bucket = revenue_bucket + (bucket_size / 2),
    assets_bucket = assets_bucket + (bucket_size / 2)
  )

ggplot(
  heatmap_data_outcome,
  aes(x = revenue_bucket, y = assets_bucket, fill = avg_outcome)
) +
  geom_tile() +
  geom_vline(xintercept = 200000, color = clrs[7]) +
  geom_hline(yintercept = 500000, color = clrs[7]) +
  scale_fill_scico(
    palette = "batlow",
    # trans = "sqrt",
    guide = guide_colorbar(barwidth = 15, barheight = 0.5)
  ) +
  scale_x_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  labs(
    x = "Total revenue",
    y = "Total assets",
    fill = "Average debt/equity ratio"
  ) +
  coord_cartesian(xlim = c(0, 300000), ylim = c(0, 750000)) +
  theme_minimal(base_family = "Inter") +
  theme(
    axis.title.x = element_text(hjust = 0),
    axis.title.y = element_text(hjust = 1),
    legend.position = "bottom",
    legend.justification = "left",
    legend.title.position = "top",
    legend.margin = margin(l = 0, t = 0)
  )

Code

df_truncated_outcome <- df_2021 |> 
  filter(
    debt_to_equity_ratio >= quantile(debt_to_equity_ratio, 0.025) &
      debt_to_equity_ratio <= quantile(debt_to_equity_ratio, 0.975)
  )

rd_full_L <- rdms(
  Y = df_truncated_outcome$debt_to_equity_ratio,
  X = df_truncated_outcome$revenue_c,
  C = 0,
  X2 = df_truncated_outcome$assets_c,
  C2 = 0,
  zvar = df_truncated_outcome$ez,
  fuzzy = df_truncated_outcome$ez
)
## 
## ================================================================================
## Cutoff           Coef.    P-value          95% CI          hl       hr        Nh        
## ================================================================================
## (0.00,0.00)      0.230   0.007     0.082     0.529     331297.736331297.73637536     
## ================================================================================

Binding score RD

Code

binding_data <- df_2021 |>
  mutate(
    rev_std = (revenue - 200000) / sd(revenue),
    asset_std = (assets - 500000) / sd(assets),
    binding_score = pmin(rev_std, asset_std),
    distance_score = pmin(abs(rev_std), abs(asset_std)) * (2*eligible - 1)
  ) |> 
  filter(
    debt_to_equity_ratio >= quantile(debt_to_equity_ratio, 0.025) &
      debt_to_equity_ratio <= quantile(debt_to_equity_ratio, 0.975)
  )

Code

binding_data_window <- binding_data |>
  arrange(ez) |>
  filter(abs(binding_score) < 0.33)

rdplot(
  y = binding_data$debt_to_equity_ratio,
  x = binding_data$binding_score,
  c = 0,
  hide = TRUE
)$rdplot +
  labs(x = "Binding score", y = "Profit/margin ratio", title = NULL) +
  theme_minimal(base_family = "Inter")

Code


rdrobust(
  y = binding_data$debt_to_equity_ratio,
  x = binding_data$binding_score,
  c = 0,
  fuzzy = binding_data$ez
) |>
  summary()
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.               113814
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                99120        14694
## Eff. Number of Obs.            4106         3113
## Order est. (p)                    1            1
## Order bias  (q)                   2            2
## BW est. (h)                   0.191        0.191
## BW bias (b)                   0.568        0.568
## rho (h/b)                     0.336        0.336
## Unique Obs.                   88112        14544
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.001     0.003    -0.448     0.654    [-0.008 , 0.005]     
##         Robust         -         -    -0.849     0.396    [-0.009 , 0.004]     
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -2.371    37.013    -0.064     0.949   [-74.915 , 70.173]    
##         Robust         -         -    -0.027     0.979   [-76.475 , 74.427]    
## =============================================================================

Survival

How about survival? This is tricky since there aren’t a ton of closings:

Code

df_2021 |> 
  count(closed, eligible)
## # A tibble: 4 × 3
##   closed eligible     n
##   <lgl>  <lgl>    <int>
## 1 FALSE  FALSE    60347
## 2 FALSE  TRUE     59244
## 3 TRUE   FALSE      109
## 4 TRUE   TRUE       106

But we try regardless!

Frontier RD

Code

low_assets_only <- df_2021 |>
  filter(assets > 300000 & assets < 500000) |>
  filter(revenue > 175000 & revenue < 225000)

frontier_revenue_ols <- feols(
  closed ~ revenue_c + I(revenue_c^2) | 0 | ez ~ eligible,
  data = low_assets_only,
  vcov = "HC1"
)
summary(frontier_revenue_ols)
## TSLS estimation - Dep. Var.: closed
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: closed
## Observations: 1,556
## Standard-errors: Heteroskedasticity-robust 
##                  Estimate Std. Error  t value Pr(>|t|) 
## (Intercept)     1.957e-04  3.800e-04  0.51513  0.60654 
## fit_ezTRUE      7.274e-03  1.168e-02  0.62292  0.53343 
## revenue_c      -3.839e-09  1.733e-07 -0.02215  0.98233 
## I(revenue_c^2)  5.550e-12  8.390e-12  0.66108  0.50866 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.050765   Adj. R2: -0.006996
## F-test (1st stage), ezTRUE: stat = 95.6    , p < 2.2e-16 , on 1 and 1,552 DoF.
##                 Wu-Hausman: stat =  0.67414, p = 0.411738, on 1 and 1,551 DoF.

model_parameters(frontier_revenue_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(1552) |     p
## -----------------------------------------------------------------
## fit ezTRUE |    7.27e-03 | 0.01 | [-0.02, 0.03] |    0.62 | 0.533

Code

rd_frontier_revenue <- rdrobust(
  y = low_assets_only$closed,
  x = low_assets_only$revenue_c,
  c = 0,
  fuzzy = low_assets_only$ez,
  p = 2
)
summary(rd_frontier_revenue)
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.                 1556
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  795          761
## Eff. Number of Obs.             161          149
## Order est. (p)                    2            2
## Order bias  (q)                   3            3
## BW est. (h)                4391.989     4391.989
## BW bias (b)                4553.104     4553.104
## rho (h/b)                     0.965        0.965
## Unique Obs.                     778          746
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.322     0.148    -2.170     0.030    [-0.613 , -0.031]    
##         Robust         -         -    -1.920     0.055    [-0.815 , 0.008]     
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.000     0.000       NaN       NaN    [-0.000 , 0.000]     
##         Robust         -         -       NaN       NaN    [-0.000 , 0.000]     
## =============================================================================

rdplot(
  y = low_assets_only$closed,
  x = low_assets_only$revenue_c,
  c = 0,
  p = 2,
  hide = TRUE
)$rdplot +
  labs(x = "Revenue", y = "Closed", title = NULL) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal(base_family = "Inter")

Code

low_revenue_only <- df_2021 |> 
  filter(revenue > 100000 & revenue < 200000) |> 
  filter(assets > 450000 & assets < 550000)

frontier_assets_ols <- feols(
  closed ~ assets_c + I(assets_c^2) | 0 | ez ~ eligible,
  data = low_revenue_only,
  vcov = "HC1"
)
summary(frontier_assets_ols)
## TSLS estimation - Dep. Var.: closed
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: closed
## Observations: 1,364
## Standard-errors: Heteroskedasticity-robust 
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.114e-03  3.822e-03  0.8147 0.415390    
## fit_ezTRUE     1.090e-02  1.821e-02  0.5987 0.549451    
## assets_c       2.695e-08  7.876e-08  0.3422 0.732277    
## I(assets_c^2) -2.720e-12  1.570e-12 -1.7340 0.083146 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.054319   Adj. R2: -0.011341
## F-test (1st stage), ezTRUE: stat = 94.1   , p < 2.2e-16 , on 1 and 1,360 DoF.
##                 Wu-Hausman: stat =  1.1493, p = 0.283887, on 1 and 1,359 DoF.

model_parameters(frontier_assets_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(1360) |     p
## -----------------------------------------------------------------
## fit ezTRUE |        0.01 | 0.02 | [-0.02, 0.05] |    0.60 | 0.549

Code

rd_frontier_assets <- rdrobust(
  y = low_revenue_only$closed,
  x = low_revenue_only$assets_c,
  c = 0,
  fuzzy = low_revenue_only$ez,
  p = 2
)
summary(rd_frontier_assets)
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.                 1364
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  743          621
## Eff. Number of Obs.              73           63
## Order est. (p)                    2            2
## Order bias  (q)                   3            3
## BW est. (h)                5086.202     5086.202
## BW bias (b)               24266.437    24266.437
## rho (h/b)                     0.210        0.210
## Unique Obs.                     734          619
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.584     0.168    -3.482     0.000    [-0.912 , -0.255]    
##         Robust         -         -    -3.482     0.000    [-0.913 , -0.255]    
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.000     0.000       NaN       NaN    [-0.000 , 0.000]     
##         Robust         -         -    -0.556     0.578    [-0.000 , 0.000]     
## =============================================================================

rdplot(
  y = low_revenue_only$closed,
  x = low_revenue_only$assets_c,
  c = 0,
  p = 2,
  hide = TRUE
)$rdplot +
  labs(x = "Assets", y = "Closed", title = NULL) +
  scale_x_continuous(labels = label_dollar()) +
  theme_minimal(base_family = "Inter")

Response surface RD

Code

L_bandwidth_area <- df_2021 |>
  filter(revenue > 175000 & revenue < 225000) |>
  filter(assets > 450000 & assets < 550000) |>
  mutate(
    # Calculate distance to nearest threshold
    dist_revenue = abs(revenue_c) / 50000,
    dist_assets = abs(assets_c) / 100000,
    dist_to_frontier = pmin(dist_revenue, dist_assets),
    # Make triangular weights
    kernel_weight = pmax(0, 1 - dist_to_frontier)
  )

surface_ols <- feols(
  closed ~
    revenue_c +
      I(revenue_c^2) +
      assets_c +
      I(assets_c^2) +
      revenue_c:assets_c |
      0 |
      ez ~
    eligible,
  data = L_bandwidth_area,
  weights = L_bandwidth_area$kernel_weight,
  vcov = "HC1"
)
summary(surface_ols)
## TSLS estimation - Dep. Var.: closed
##                   Endo.    : ez
##                   Instr.   : eligible
## Second stage: Dep. Var.: closed
## Observations: 578
## Weights: L_bandwidth_area$kernel_weight
## Standard-errors: Heteroskedasticity-robust 
##                      Estimate Std. Error  t value Pr(>|t|) 
## (Intercept)         2.243e-03  1.692e-03  1.32563  0.18549 
## fit_ezTRUE          6.387e-04  3.492e-02  0.01829  0.98541 
## revenue_c          -3.083e-07  3.247e-07 -0.94957  0.34273 
## I(revenue_c^2)      1.701e-11  2.170e-11  0.78391  0.43342 
## assets_c           -3.078e-08  3.284e-08 -0.93753  0.34888 
## I(assets_c^2)      -2.470e-12  2.270e-12 -1.09153  0.27550 
## revenue_c:assets_c -1.700e-12  1.530e-12 -1.11262  0.26634 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.054514   Adj. R2: -0.001699
## F-test (1st stage), ezTRUE: stat = 75.2     , p < 2.2e-16 , on 1 and 571 DoF.
##                 Wu-Hausman: stat =  0.082412, p = 0.774159, on 1 and 570 DoF.

model_parameters(surface_ols, verbose = FALSE, keep = "ez")
## # Fixed Effects
## 
## Parameter  | Coefficient |   SE |        95% CI | t(571) |     p
## ----------------------------------------------------------------
## fit ezTRUE |    6.39e-04 | 0.03 | [-0.07, 0.07] |   0.02 | 0.985

Code

bucket_size <- 10000

heatmap_data_outcome <- df_2021 |>
  mutate(
    revenue_bucket = floor(revenue / bucket_size) * bucket_size,
    assets_bucket = floor(assets / bucket_size) * bucket_size
  ) |>
  group_by(revenue_bucket, assets_bucket) |>
  summarize(avg_outcome = mean(closed == TRUE)) |>
  # geom_tile plots from the center of each bucket, so shift these over a bit
  mutate(
    revenue_bucket = revenue_bucket + (bucket_size / 2),
    assets_bucket = assets_bucket + (bucket_size / 2)
  )

ggplot(
  heatmap_data_outcome,
  aes(x = revenue_bucket, y = assets_bucket, fill = avg_outcome)
) +
  geom_tile() +
  geom_vline(xintercept = 200000, color = clrs[7]) +
  geom_hline(yintercept = 500000, color = clrs[7]) +
  scale_fill_scico(
    palette = "batlow",
    # trans = "sqrt",
    guide = guide_colorbar(barwidth = 15, barheight = 0.5)
  ) +
  scale_x_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  labs(
    x = "Total revenue",
    y = "Total assets",
    fill = "Proportion closed"
  ) +
  coord_cartesian(xlim = c(0, 300000), ylim = c(0, 750000)) +
  theme_minimal(base_family = "Inter") +
  theme(
    axis.title.x = element_text(hjust = 0),
    axis.title.y = element_text(hjust = 1),
    legend.position = "bottom",
    legend.justification = "left",
    legend.title.position = "top",
    legend.margin = margin(l = 0, t = 0)
  )

Code

df_truncated_outcome <- df_2021

rd_full_L <- rdms(
  Y = df_truncated_outcome$closed,
  X = df_truncated_outcome$revenue_c,
  C = 0,
  X2 = df_truncated_outcome$assets_c,
  C2 = 0,
  zvar = df_truncated_outcome$ez,
  fuzzy = df_truncated_outcome$ez
)
## 
## ================================================================================
## Cutoff           Coef.    P-value          95% CI          hl       hr        Nh        
## ================================================================================
## (0.00,0.00)      0.001   0.743     -0.006    0.009     216503.968216503.96817951     
## ================================================================================

Binding score RD

Code

binding_data <- df_2021 |>
  mutate(
    rev_std = (revenue - 200000) / sd(revenue),
    asset_std = (assets - 500000) / sd(assets),
    binding_score = pmin(rev_std, asset_std),
    distance_score = pmin(abs(rev_std), abs(asset_std)) * (2*eligible - 1)
  )

Code

binding_data_window <- binding_data |>
  arrange(ez) |>
  filter(abs(binding_score) < 0.33)

rdplot(
  y = binding_data$closed,
  x = binding_data$binding_score,
  c = 0,
  hide = TRUE
)$rdplot +
  labs(x = "Binding score", y = "Closed", title = NULL) +
  theme_minimal(base_family = "Inter")

Code


rdrobust(
  y = binding_data$closed,
  x = binding_data$binding_score,
  c = 0,
  fuzzy = binding_data$ez
) |>
  summary()
## Fuzzy RD estimates using local polynomial regression.
## 
## Number of Obs.               119806
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               104699        15107
## Eff. Number of Obs.            2919         2376
## Order est. (p)                    1            1
## Order bias  (q)                   2            2
## BW est. (h)                   0.138        0.138
## BW bias (b)                   0.447        0.447
## rho (h/b)                     0.309        0.309
## Unique Obs.                   92277        14949
## 
## First-stage estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.006     0.003    -1.926     0.054    [-0.013 , 0.000]     
##         Robust         -         -    -2.033     0.042    [-0.014 , -0.000]    
## =============================================================================
## 
## Treatment effect estimates.
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -0.247     0.399    -0.620     0.535    [-1.030 , 0.535]     
##         Robust         -         -    -0.542     0.588    [-1.032 , 0.585]     
## =============================================================================

References

Cattaneo, Matias D., Luke Keele, Rocío Titiunik, and Gonzalo Vazquez-Bare. 2016. “Interpreting Regression Discontinuity Designs with Multiple Cutoffs.” The Journal of Politics 78 (4): 1229–48. https://doi.org/10.1086/686802.

———. 2021. “Extrapolating Treatment Effects in Multi-Cutoff Regression Discontinuity Designs.” Journal of the American Statistical Association 116 (536): 1941–52. https://doi.org/10.1080/01621459.2020.1751646.

Matsudaira, Jordan D. 2008. “Mandatory Summer School and Student Achievement.” Journal of Econometrics 142 (2): 829–50. https://doi.org/10.1016/j.jeconom.2007.05.015.

Reardon, Sean F., and Joseph P. Robinson. 2012. “Regression Discontinuity Designs with Multiple Rating-Score Variables.” Journal of Research on Educational Effectiveness 5 (1): 83–104. https://doi.org/10.1080/19345747.2011.609583.