# Regression Diagnostics, 2nd Edition # John Fox # last modified: 2019-06-18 # R script for Ch. 7 library(car) library(MASS) # VIFs for Duncan's regression Duncan <- read.table("Duncan.txt", header=TRUE) # assumes that Duncan.txt is in the current directory, adjust as necessary mod.duncan <- lm(prestige ~ income + education, data=Duncan) S(mod.duncan) with(Duncan, cor(cbind(income, education))) sqrt(vif(mod.duncan)) mod.duncan.1 <- update(mod.duncan, subset=-c(6, 16)) # remove minister, conductor S(mod.duncan.1) with(Duncan[-c(6, 16), ], cor(cbind(income, education))) sqrt(vif(mod.duncan.1)) # Fig. 7.1: data and confidence ellipses for regressions on contrived data set.seed(13221) # for reproducibility X1 <- mvrnorm(200, mu=c(0, 0), Sigma=matrix(c(1, 0, 0, 1), 2, 2)) X2 <- mvrnorm(200, mu=c(0, 0), Sigma=matrix(c(1, 0.95, 0.95, 1), 2, 2)) cor(X1) cor(X2) set.seed(123) y1 <- X1 %*% c(2, 3) + rnorm(200, sd=10) mod1 <- lm(y1 ~ X1) brief(mod1) set.seed(123) y2 <- X2 %*% c(2, 3) + rnorm(200, sd=10) mod2 <- lm(y2 ~ X2) brief(mod2) par <- par(mfrow=c(2, 2)) dataEllipse(X1, levels=0.95, xlim=c(-3, 3), ylim=c(-3, 3), col="black", center.cex=2, main="(a)", xlab=expression(x[1]), ylab=expression(x[2]), las=1, segments=1000) dataEllipse(X2, levels=0.95, xlim=c(-3, 3), ylim=c(-3, 3), col="black", center.cex=2, main="(b)", xlab=expression(x[1]), ylab=expression(x[2]), las=1, segments=1000) confidenceEllipse(mod1, col="black",xlim=c(-5, 10), ylim=c(-5, 10), fill=TRUE, center.cex=2, levels=c(0.85, 0.95), fill.alpha=0.2, main="(c)", xlab=expression(beta[1]), ylab=expression(beta[2]), las=1, segments=1000) abline(h=0, v=0) points(0, 0, pch=15, cex=2) confidenceEllipse(mod2, col="black", xlim=c(-5, 10), ylim=c(-5, 10), fill=TRUE, center.cex=2, levels=c(0.85, 0.95), fill.alpha=0.2, main="(d)", xlab=expression(beta[1]), ylab=expression(beta[2]), las=1, segments=1000) abline(h=0, v=0) points(0, 0, pch=15, cex=2) par(par) # Fig. 7.2: marginal vs added-variable plots for the contrived regressions # (note: the figure in the text uses some custom code not in mcPlot() ) par <- par(mfrow=c(1, 2)) mcPlot(mod1, "X11", col.cond="black", col.marg="black", col.arrows=gray(0.5), pch=c(1, 16), grid=FALSE) mcPlot(mod2, "X21", col.cond="black", col.marg="black", col.arrows=gray(0.5), pch=c(1, 16), grid=FALSE) par(par) # warnings are innocuous # GVIFs for the regression of log(infant mortality) on for the CIA data CIA <- read.table("CIA.txt", header=TRUE) # assumes that CIA.txt is in the current directory, adjust as necessary mod.cia <- lm(log(infant) ~ log(gdp) + poly(health, 2, raw=TRUE) + gini, data=CIA) S(mod.cia) vif(mod.cia)