We're going to continue with our look at the "incumbent advantage" in US elections, again using the Lee2008 data that comes with RDDtools. But this time we are going to explore some of our methodological tools for getting a better estimate of the potential causal effect.
pacman::p_load(
tidyverse, # Data manipulation,
ggplot2, # beautiful figures,
texreg,# Regression tables with nice layout.
devtools, # For installing RDDtools
Rtools, # For installing RDDtools
RDDtools # For running Regression Discontinuity analyses
)
# Import the data from Lee, David. (2008). "Randomized experiments from non-random selection in U.S. House elections." Journal of Econometrics.
data('Lee2008')
1A. Fit separate models to the Lee2008 data where (a) you assume the relationship is linear, as in Exercise 1; (b) you assume the relationship is a second order polynomial, i.e. a power of two; and (c) you assume the relationship is a third order polynomial, i.e. a power of three, and (d) you assume the relationship is a fourth order polynomial, i.e. a power of four. The slope should be the same on both sides of the threshold, for every model. Create a plot for each model, showing the line of best fit.
Lee2008_rdd <- RDDdata(
y = Lee2008$y,
x = Lee2008$x,
cutpoint = 0
)
reg_para1 <- RDDreg_lm(RDDobject = Lee2008_rdd, order = 1, slope='same')
coef1 <- round(summary(reg_para1)$coefficients[2, 1], 3)
se1 <- round(summary(reg_para1)$coefficients[2, 2], 3)
reg_para2 <- RDDreg_lm(RDDobject = Lee2008_rdd, order = 2, slope='same')
coef2 <- round(summary(reg_para2)$coefficients[2, 1], 3)
se2 <- round(summary(reg_para2)$coefficients[2, 2], 3)
reg_para3 <- RDDreg_lm(RDDobject = Lee2008_rdd, order = 3, slope='same')
coef3 <- round(summary(reg_para3)$coefficients[2, 1], 3)
se3 <- round(summary(reg_para3)$coefficients[2, 2], 3)
reg_para4 <- RDDreg_lm(RDDobject = Lee2008_rdd, order = 4, slope='same')
coef4 <- round(summary(reg_para4)$coefficients[2, 1], 3)
se4 <- round(summary(reg_para4)$coefficients[2, 2], 3)
plot(
reg_para1,
xlab='Democratic Win Margin in time t\n',
ylab='Democratic Vote Share in time t+1'
)
plot(
reg_para2,
xlab='Democratic Win Margin in time t\n',
ylab='Democratic Vote Share in time t+1'
)
plot(
reg_para3,
xlab='Democratic Win Margin in time t\n',
ylab='Democratic Vote Share in time t+1'
)
plot(
reg_para4,
xlab='Democratic Win Margin in time t\n',
ylab='Democratic Vote Share in time t+1'
)
1B. Every model provides a different estimate for the effect of being the winner of the previous election. Among the models that assume a non-linear relationship, the largest estimate is (3 decimal places) and the smallest is (3 decimal places) .
summary(reg_para2)
#
# Call:
# lm(formula = y ~ ., data = dat_step1, weights = weights)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.8975 -0.0614 0.0026 0.0718 0.8559
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.43521 0.00371 117.4 < 2e-16 ***
# D 0.11977 0.00574 20.9 < 2e-16 ***
# x 0.31681 0.00694 45.6 < 2e-16 ***
# `x^2` 0.02574 0.00660 3.9 9.7e-05 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 0.138 on 6554 degrees of freedom
# Multiple R-squared: 0.671, Adjusted R-squared: 0.671
# F-statistic: 4.45e+03 on 3 and 6554 DF, p-value: <2e-16
summary(reg_para3)
#
# Call:
# lm(formula = y ~ ., data = dat_step1, weights = weights)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.8717 -0.0596 0.0026 0.0685 0.7792
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.45887 0.00414 110.74 < 2e-16 ***
# D 0.06445 0.00725 8.89 < 2e-16 ***
# x 0.46183 0.01369 33.73 < 2e-16 ***
# `x^2` 0.05517 0.00696 7.93 2.5e-15 ***
# `x^3` -0.16864 0.01378 -12.24 < 2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 0.137 on 6553 degrees of freedom
# Multiple R-squared: 0.678, Adjusted R-squared: 0.678
# F-statistic: 3.45e+03 on 4 and 6553 DF, p-value: <2e-16
summary(reg_para4)
#
# Call:
# lm(formula = y ~ ., data = dat_step1, weights = weights)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.8808 -0.0610 0.0020 0.0684 0.7481
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.47125 0.00461 102.32 < 2e-16 ***
# D 0.05797 0.00731 7.93 2.6e-15 ***
# x 0.48356 0.01411 34.26 < 2e-16 ***
# `x^2` -0.07415 0.02235 -3.32 0.00091 ***
# `x^3` -0.19807 0.01457 -13.60 < 2e-16 ***
# `x^4` 0.14028 0.02305 6.09 1.2e-09 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 0.136 on 6552 degrees of freedom
# Multiple R-squared: 0.68, Adjusted R-squared: 0.68
# F-statistic: 2.79e+03 on 5 and 6552 DF, p-value: <2e-16
bandwidth <- RDDbw_IK(Lee2008_rdd)
reg_nonparam <- RDDreg_np(RDDobject = Lee2008_rdd, bw = bandwidth, slope='same')
coef_nonparam <- summary(reg_nonparam)$coefMat[1]
se_nonparam <- summary(reg_nonparam)$coefMat[2]
summary(reg_nonparam)
# ### RDD regression: nonparametric local linear###
# Bandwidth: 0.294
# Number of obs: 3200 (left: 1594, right: 1606)
#
# Weighted Residuals:
# Min 1Q Median 3Q Max
# -0.9535 -0.0623 0.0008 0.0514 0.9620
#
# Coefficient:
# Estimate Std. Error z value Pr(>|z|)
# D 0.07978 0.00946 8.43 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Local R squared: 0.356