##------------------------------------------------------------------------## ## Script for web appendix on bootstrapping ## ## An R and S-PLUS Companion to Applied Regression ## ## John Fox ## ## Sage Publications, 2002 ## ##------------------------------------------------------------------------## # preliminaries options(width=65) options(digits=5) set.seed(170102) library(car) # for Duncan data set and data.ellipse library(MASS) data(Duncan) mod.duncan.hub <- rlm(prestige ~ income + education, data=Duncan) summary(mod.duncan.hub) # random-X resampling boot.huber <- function(data, indices, maxit=20){ data <- data[indices,] mod <- rlm(prestige ~ income + education, data=data, maxit=maxit) coefficients(mod) } library(boot) system.time(duncan.boot <- boot(Duncan, boot.huber, 1999, maxit=100)) duncan.boot duncan.array <- boot.array(duncan.boot) duncan.array[1:2,] plot(duncan.boot, index=2) # income coef. plot(duncan.boot, index=3) # education coef. data.ellipse(duncan.boot$t[,2], duncan.boot$t[,3], xlab='income coefficient', ylab='education coefficient', cex=.3, levels=c(.5, .95, .99), robust=T) # income coefficient boot.ci(duncan.boot, index=2, type=c("norm", "perc", "bca")) # education coefficient boot.ci(duncan.boot, index=3, type=c("norm", "perc", "bca")) par(mfcol=c(2,1), mar=par('mar')+.3) jack.after.boot(duncan.boot, index=2, main='(a) income coefficient') jack.after.boot(duncan.boot, index=3, main='(b) education coefficient') # fixed-X resampling fit <- fitted(mod.duncan.hub) e <- residuals(mod.duncan.hub) X <- model.matrix(mod.duncan.hub) boot.huber.fixed <- function(data, indices, maxit=20){ y <- fit + e[indices] mod <- rlm(y ~ X - 1, maxit=maxit) coefficients(mod) } duncan.fix.boot <- boot(Duncan, boot.huber.fixed, 1999, maxit=100) duncan.fix.boot par(mfcol=c(2,1)) jack.after.boot(duncan.fix.boot, index=2, main='(a) income coefficient') jack.after.boot(duncan.fix.boot, index=3, main='(b) education coefficient') plot(duncan.fix.boot, index=1, main='income coefficient') plot(duncan.fix.boot, index=2, main='education coefficient') data.ellipse(duncan.fix.boot$t[,2], duncan.fix.boot$t[,3], xlab='income coefficient', ylab='education coefficient', levels=c(.5, .9, .99), robust=T) boot.ci(duncan.fix.boot, index=2, type=c("norm", "basic", "perc", "bca")) boot.ci(duncan.fix.boot, index=3, type=c("norm", "basic", "perc", "bca")) # bootstrap hypothesis test b <- coefficients(mod.duncan.hub) sumry <- summary(mod.duncan.hub) # to obtain coef. cov. matrix V <- sumry$cov.unscaled * sumry$stddev^2 # coef. cov. matrix L <- c(0, 1, -1) # hypothesis matrix b1 <- b[2] - b[3] t <- b1/sqrt(L %*% V %*% L) t # test statistic boot.test <- function(data, indices, maxit=20){ data <- data[indices,] mod <- rlm(prestige ~ income + education, data=data, maxit=maxit) sumry <- summary(mod) V.star <- sumry$cov.unscaled * sumry$stddev^2 b.star <- coefficients(mod) b1.star <- b.star[2] - b.star[3] t <- (b1.star - b1)/sqrt(L %*% V.star %*% L) t } duncan.test.boot <- boot(Duncan, boot.test, 999, maxit=100) summary(duncan.test.boot$t) par(mfrow=c(1,2)) qqnorm(duncan.test.boot$t) qqline(duncan.test.boot$t) abline(0, 1, lty=2) hist(duncan.test.boot$t, breaks=50) box() abline(v=t, lwd=3) # 2-sided p-value 2 * (1 + sum(duncan.test.boot$t > as.vector(t)))/1000