Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
224 views
in Technique[技术] by (71.8m points)

r - Correcting the standard errors, when the second stage is a tobit regression

I am trying to implement this bootstrap method for my data, but with more variables, to correct my standard errors. Somewhere I am messing up though (the original code is posted at the bottom)..

Cross-posted at CrossValidated

set.seed(2)

a    <- 2    # structural parameter of interest
b    <- 1    # strength of instrument
rho  <- 0.5  # degree of endogeneity

N    <- 1000
z    <- rnorm(N)
res1 <- rnorm(N)
res2 <- res1*rho + sqrt(1-rho*rho)*rnorm(N)
x    <- z*b + res1
ys   <- x*a + res2
d    <- (ys>0) #dummy variable
y    <- round(10-(d*ys))
random_variable <- rnorm(100, mean = 0, sd = 1)

library(data.table)
DT_1 <- data.frame(y,x,z, random_variable)
DT_2 <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
45, 46, 47, 48, 49, 50), year = c(1995, 1995, 1995, 1995, 1995, 
1995, 1995, 1995, 1995, 1995, 2000, 2000, 2000, 2000, 2000, 2000, 
2000, 2000, 2000, 2000, 2005, 2005, 2005, 2005, 2005, 2005, 2005, 
2005, 2005, 2005, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 
2010, 2010, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015), Group = c("A", "A", "A", "A", "B", "B", "B", "B", "C", 
"C", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "A", "A", 
"A", "A", "B", "B", "B", "B", "C", "C", "A", "A", "A", "A", "B", 
"B", "B", "B", "C", "C", "A", "A", "A", "A", "B", "B", "B", "B", 
"C", "C"), event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), win_or_lose = c(-1, 
-1, -1, -1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, 0, 0, 
-1, -1, -1, -1, 1, 1, 1, 1, 0, 0)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))
DT_1 <- setDT(DT_1)
DT_2 <- setDT(DT_2)
DT_2 <- rbind(DT_2 , DT_2 [rep(1:50, 19), ])
sandbox <- cbind(DT_1, DT_2)

I am trying to implement the bootstrap method as follows:

# z = exogenous (IV)
# random_variable  = exogenous (IV)
# year = exogenous

The first stage is an ols.

ols <- lm(x ~ z + random_variable + year, data=sandbox);print(summary(ols))

The second stage is a tobit:

inconsistent.tobit <- censReg(y~x)
summary(inconsistent.tobit)

reduced.form <- ols
summary(reduced.form)


consistent.tobit <- censReg(y~fitted(reduced.form)+residuals(reduced.form))
summary(consistent.tobit)


# I'd like bootstrapped standard errors, please!

tobit_2siv_coef <- function(data, indices){
  d <- data[indices,]
  reduced.form <- ols
  consistent.tobit <- censReg(d[,"y"]~fitted(reduced.form)+residuals(reduced.form))
  return(summary(consistent.tobit)$estimate["fitted(reduced.form)",1])
}

boot.results <- boot(data=sandbox, statistic=tobit_2siv_coef, R=100)
boot.results

In my actual data, I did not get the same estimate, in the example data I get the error:

Error in model.frame.default(formula = d[, "y"] ~ fitted(reduced.form) +  : 
  invalid type (list) for variable 'd[, "y"]'

Original example (link)

require(censReg)
require(boot)

a    <- 2    # structural parameter of interest
b    <- 1    # strength of instrument
rho  <- 0.5  # degree of endogeneity

N    <- 1000
z    <- rnorm(N)
res1 <- rnorm(N)
res2 <- res1*rho + sqrt(1-rho*rho)*rnorm(N)
x    <- z*b + res1
ys   <- x*a + res2
d    <- (ys>0) #dummy variable
y    <- d*ys

inconsistent.tobit <- censReg(y~x)
summary(inconsistent.tobit)

reduced.form <- lm(x~z)
summary(reduced.form)


consistent.tobit <- censReg(y~fitted(reduced.form)+residuals(reduced.form))
summary(consistent.tobit)


# I'd like bootstrapped standard errors, please!
my.data <- data.frame(y,x,z)
tobit_2siv_coef <- function(data,indices){
  d <- data[indices,]
  reduced.form <- lm(x~z,data=d)
  consistent.tobit <- censReg(d[,"y"]~fitted(reduced.form)+residuals(reduced.form))
  return(summary(consistent.tobit)$estimate["fitted(reduced.form)",1])
}

boot.results <- boot(data=my.data,statistic=tobit_2siv_coef,R=100)
boot.results

EDIT - AER::TOBIT version

  reduced.form <- lm(x ~ z + random_variable + year, data=x)
  consistent.tobit <<- AER::tobit(y ~ fitted(reduced.form) + residuals(reduced.form), left=0, right=100, data=dataset)

  bootstrap_se <- function(x) {
    reduced.form <- first_stage_ols
    AER::tobit(y ~ fitted(reduced.form) + residuals(reduced.form), left=0, right=100, data=dataset)$estimate
    # second_stage_tobit_b$estimate
  }
  library(AER::tobit)
  R <- 100
  res <- t(replicate(R, bootstrap_se(dataset[sample(nrow(dataset), nrow(dataset), replace=T), ])))
  # To scrape out a summary, the matrixStats package is most convenient.
  library(matrixStats)
  b <- consistent.tobit$coefficients
  b <- append(b, consistent.tobit[["icoef"]][["Log(scale)"]])
  SE <- colSds(res)
  z <- consistent.tobit$coefficients/SE
  p <- 2 * pt(-abs(z), df = Inf)
  ci <- colQuantiles(res, probs=c(.025, .975))
  res <<- signif(cbind(b, SE, z, p, ci), 4)

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)

I don't use boot, but basically you could put the code into a FUNction and sample the observations using sample with replace=TRUE in a `replicate.

library(AER)
reduced.form <- lm(x ~ z + random_variable + year, data=sandbox)
consistent.tobit <- tobit(y ~ fitted(reduced.form) + residuals(reduced.form), 
                          left=0, right=100, data=sandbox)

FUN <- function(x) {
  reduced.form <- lm(x ~ z + random_variable + year, data=x)
  fit <- tobit(y ~ fitted(reduced.form) + residuals(reduced.form), data=x)
  c(fit$coefficients, logSigma=log(fit$scale))
}

set.seed(42)
R <- 200
bs <- t(replicate(R, FUN(sandbox[sample(nrow(sandbox), nrow(sandbox), replace=T), ])))

To scrape out a summary, the matrixStats package is most convenient.

library(matrixStats)
b <- c(consistent.tobit$coefficients, logSigma=log(consistent.tobit$scale))
SE <- colSds(bs)
z <- b/SE
p <- 2 * pt(-abs(z), df = Inf)
ci <- colQuantiles(bs, probs=c(.025, .975))
res <- signif(cbind(b, SE, z, p, ci), 4)
res
#                               b      SE      z          p    2.5%   97.5%
#   (Intercept)            8.6470 0.04127 209.50  0.000e+00  8.5630  8.7190
# fitted(reduced.form)    -1.0600 0.04261 -24.86 1.890e-136 -1.1400 -0.9772
# residuals(reduced.form) -1.2960 0.05493 -23.59 4.958e-123 -1.3830 -1.1740
# logSigma                 0.1589 0.02665   5.96  2.518e-09  0.1067  0.2049

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...