county year crmrte prbarr
1 1 81 0.0398849 0.289696
2 1 82 0.0383449 0.338111
3 1 83 0.0303048 0.330449
4 1 84 0.0347259 0.362525
5 1 85 0.0365730 0.325395
6 1 86 0.0347524 0.326062
Panel data are additionally important because:
Estimate Std. Error t value Pr(>|t|)
0.0648010359 0.0159005783 4.0753886238 0.0003839695
LocalStuff — fixed county characteristics (e.g., geography, institutions).
LawAndOrder and CivilRights — political and institutional traits that may change slowly, but are mostly fixed over short horizons.
Police budgets and Poverty levels — vary within counties over time, reflecting local and macroeconomic fluctuations (national)
Can panel data allows us to separate time-invariant county traits from time-varying factors — helping us get closer to a causal interpretation?
\[
y_{it} = \beta x_{it} + \underbrace{c_i + u_{it}}_{\text{error}}
\] - \(c_i\) may correlate with \(x_{it}\) → OLS biased \(E(c_i + u_{it} \mid x_{it}) \neq 0\) - Simple OLS mixes within + between; biased if \(Cov(x_{it}, c_i) \neq 0\). - Between: differences across \(i\) (time-invariant) → drives pooled OLS bias
- Within: changes over \(t\) within \(i\) → identifies \(\beta\) under FE assumptions
css <- crime4 %>% filter(county %in% c(1,3,23,145))
m_cs <- lm(crmrte ~ prbarr, data = css)
coef(m_cs)["prbarr"] prbarr
0.06480104
\[ y_{it} = \beta x_{it} + \underbrace{c_i + u_{it}}_{\text{error}} \]
Call:
lm(formula = crmrte ~ prbarr + factor(county), data = css)
Residuals:
Min 1Q Median 3Q Max
-0.0052637 -0.0014554 0.0000986 0.0009928 0.0083848
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.044945 0.004556 9.866 9.85e-10 ***
prbarr -0.028376 0.013629 -2.082 0.04865 *
factor(county)3 -0.024996 0.002544 -9.824 1.07e-09 ***
factor(county)23 -0.008498 0.001658 -5.126 3.41e-05 ***
factor(county)145 -0.006500 0.001596 -4.072 0.00047 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.002912 on 23 degrees of freedom
Multiple R-squared: 0.8926, Adjusted R-squared: 0.8739
F-statistic: 47.78 on 4 and 23 DF, p-value: 8.115e-11
library(modelsummary)
cdata <- css %>%
group_by(county) %>%
mutate(mean_crime = mean(crmrte),
mean_prob = mean(prbarr)) %>%
mutate(demeaned_crime = crmrte - mean_crime,
demeaned_prob = prbarr - mean_prob)
mod = list()
mod$dummy <- lm(crmrte ~ prbarr + factor(county), css) # i is the unit ID
mod$xsect <- lm(crmrte ~ prbarr, data = cdata)
mod$demeaned <- lm(demeaned_crime ~ demeaned_prob, data = cdata)
gom = 'DF|Deviance|AIC|BIC|p.value|se_type|R2 Adj. |statistic|Log.Lik.|Num.Obs.' # stuff to omit from table
modelsummary::modelsummary(mod[c("xsect","dummy","demeaned")],
statistic = 'std.error',
title = "Comparing (biased) cross-setcional OLS, dummy variable and manual demeaning panel regressions",
coef_omit = "factor",
gof_omit = gom)| xsect | dummy | demeaned | |
|---|---|---|---|
| (Intercept) | 0.009 | 0.045 | 0.000 |
| (0.005) | (0.005) | (0.001) | |
| prbarr | 0.065 | -0.028 | |
| (0.016) | (0.014) | ||
| demeaned_prob | -0.028 | ||
| (0.013) | |||
| R2 | 0.390 | 0.893 | 0.159 |
| R2 Adj. | 0.366 | 0.874 | 0.126 |
| F | 16.609 | 47.777 | 4.900 |
| RMSE | 0.01 | 0.00 | 0.00 |
cdata <- css %>%
mutate(y = crmrte, x = prbarr) %>%
group_by(county) %>%
mutate(y_dm = y - mean(y), x_dm = x - mean(x)) %>%
ungroup()
m_within_manual <- lm(y_dm ~ x_dm, data = cdata)
summary(m_within_manual)
Call:
lm(formula = y_dm ~ x_dm, data = cdata)
Residuals:
Min 1Q Median 3Q Max
-0.0052637 -0.0014554 0.0000986 0.0009928 0.0083848
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.190e-18 5.175e-04 0.000 1.0000
x_dm -2.838e-02 1.282e-02 -2.214 0.0358 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.002739 on 26 degrees of freedom
Multiple R-squared: 0.1586, Adjusted R-squared: 0.1262
F-statistic: 4.9 on 1 and 26 DF, p-value: 0.03583
dt <- as.data.table(css)[order(county, year)]
dt[, y := crmrte]; dt[, x := prbarr]
dt[, `:=`(dy = y - shift(y), dx = x - shift(x)), by = county]
m_fd <- lm(dy ~ dx, data = dt[!is.na(dy)])
summary(m_fd)
Call:
lm(formula = dy ~ dx, data = dt[!is.na(dy)])
Residuals:
Min 1Q Median 3Q Max
-0.0089587 -0.0015629 -0.0002049 0.0019199 0.0103419
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.0002985 0.0008554 -0.349 0.730
dx -0.0096765 0.0196879 -0.491 0.628
Residual standard error: 0.00413 on 22 degrees of freedom
Multiple R-squared: 0.01086, Adjusted R-squared: -0.0341
F-statistic: 0.2416 on 1 and 22 DF, p-value: 0.6279
fixest.fixest::feols (recommended)m_fe_1w <- feols(crmrte ~ prbarr | county, data = css, cluster = ~ county)
modelsummary::modelsummary(
list("Pooled (lm)" = m_cs,
"LSDV (lm)" = m_lsdv,
"Within (manual lm)" = m_within_manual,
"FE (fixest)" = m_fe_1w)
)| Pooled (lm) | LSDV (lm) | Within (manual lm) | FE (fixest) | |
|---|---|---|---|---|
| (Intercept) | 0.009 | 0.045 | 0.000 | |
| (0.005) | (0.005) | (0.001) | ||
| prbarr | 0.065 | -0.028 | -0.028 | |
| (0.016) | (0.014) | (0.005) | ||
| factor(county)3 | -0.025 | |||
| (0.003) | ||||
| factor(county)23 | -0.008 | |||
| (0.002) | ||||
| factor(county)145 | -0.007 | |||
| (0.002) | ||||
| x_dm | -0.028 | |||
| (0.013) | ||||
| Num.Obs. | 28 | 28 | 28 | 28 |
| R2 | 0.390 | 0.893 | 0.159 | 0.893 |
| R2 Adj. | 0.366 | 0.874 | 0.126 | 0.874 |
| R2 Within | 0.159 | |||
| R2 Within Adj. | 0.122 | |||
| AIC | -198.4 | -241.0 | -247.0 | -243.0 |
| BIC | -194.4 | -233.0 | -243.0 | -236.4 |
| Log.Lik. | 102.197 | 126.516 | 126.516 | |
| F | 16.609 | 47.777 | 4.900 | |
| RMSE | 0.01 | 0.00 | 0.00 | 0.00 |
| Std.Errors | by: county | |||
| FE: county | X |
y ~ x | FE1 + FE2 + ...Pooled OLS: Mixing Within and Between Variation
multiple time periods \(t\) per unit \(\color{blue}{i}\): \(\hat{\beta_{FE}} = \frac{\sum_{\color{blue}{i}} \sum_{\color{red}{t}} \left(x_{\color{blue}{i}\color{red}{t}} - \bar x_{\color{blue}{i}}\right)\left(y_{\color{blue}{i}\color{red}{t}} - \bar y_{\color{blue}{i}}\right)}{\sum_{\color{blue}{i}} \sum_{\color{red}{t}} \left(x_{\color{blue}{i}\color{red}{t}} - \bar x_{\color{blue}{i}}\right)^2}\)

The within transformation centers the data!
By time-demeaning \(y\) and \(x\), we project out the fixed factors related to county
Only within county variation is left.
Made by Nick C Huntington-Klein. 🙏
m_twoway <- feols(crmrte ~ prbarr | county + year, data = crime4, cluster = ~ county)
etable(m_fe_1w, m_twoway, se = "cluster") m_fe_1w m_twoway
Dependent Var.: crmrte crmrte
prbarr -0.0284* (0.0052) -0.0011 (0.0026)
Fixed-Effects: ----------------- ----------------
county Yes Yes
year No Yes
_______________ _________________ ________________
S.E.: Clustered by: county by: county
Observations 28 630
R2 0.89258 0.87735
Within R2 0.15859 0.00034
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m_pooled <- lm(crmrte ~ prbarr, data = crime4)
m_fe <- feols(crmrte ~ prbarr | county, data = crime4)
m_twfe <- feols(crmrte ~ prbarr | county + year, data = crime4, cluster = ~ county)
modelsummary::modelsummary(list(
"Pooled" = m_pooled,
"FE" = m_fe,
"TWFE (fixest)" = m_twfe
))| Pooled | FE | TWFE (fixest) | |
|---|---|---|---|
| (Intercept) | 0.043 | ||
| (0.001) | |||
| prbarr | -0.038 | -0.002 | -0.001 |
| (0.004) | (0.003) | (0.003) | |
| Num.Obs. | 630 | 630 | 630 |
| R2 | 0.129 | 0.871 | 0.877 |
| R2 Adj. | 0.127 | 0.849 | 0.855 |
| R2 Within | 0.001 | 0.000 | |
| R2 Within Adj. | -0.001 | -0.002 | |
| AIC | -3347.3 | -4373.6 | -4394.6 |
| BIC | -3334.0 | -3969.1 | -3963.4 |
| Log.Lik. | 1676.651 | ||
| F | 92.646 | ||
| RMSE | 0.02 | 0.01 | 0.01 |
| Std.Errors | by: county | by: county | |
| FE: county | X | X | |
| FE: year | X |