##------------------------------------------------------------------------## ## Script for web appendix on nonparametric regression ## ## An R and S-PLUS Companion to Applied Regression ## ## John Fox ## ## Sage Publications, 2002 ## ##------------------------------------------------------------------------## # preliminaries options(width=65) options(digits=5) library(car) # for data sets ## Local polynomial regression # How it works tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } data(Prestige) attach(Prestige) par(mfrow=c(2,2)) plot(income, prestige, xlab="Average Income", ylab="Prestige", type='n', main="(a)") ord <- order(income) inc <- income[ord] pre <- prestige[ord] x0 <- inc[80] diffs <- abs(inc - x0) which.diff <- sort(diffs)[50] abline(v=c(x0-which.diff, x0+which.diff), lty=2) abline(v=x0) points(inc[diffs > which.diff], pre[diffs > which.diff], pch=16, col=gray(.75)) points(inc[diffs <= which.diff], pre[diffs <= which.diff]) x.n <- inc[diffs <= which.diff] y.n <- pre[diffs <= which.diff] plot(range(income), c(0,1), xlab="Average Income", ylab="Tricube Weight", type='n', main="(b)") abline(v=c(x0-which.diff, x0+which.diff), lty=2) abline(v=x0) xwts <- seq(x0-which.diff, x0+which.diff, len=250) lines(xwts, tricube((xwts-x0)/which.diff), lty=1, lwd=2) points(x.n, tricube((x.n - x0)/which.diff)) plot(income, prestige, xlab="Average Income", ylab="Prestige", type='n', main="(c)") abline(v=c(x0-which.diff, x0+which.diff), lty=2) abline(v=x0) points(x.n, y.n) mod <- lm(y.n ~ x.n, weights=tricube((x.n-x0)/which.diff)) reg.line(mod, lwd=2, col=1) points(x0, predict(mod, data.frame(x.n=x0)), pch=16, cex=1.8) plot(income, prestige, xlab="Average Income", ylab="Prestige", main="(d)") lines(lowess(income, prestige, f=0.5, iter=0), lwd=2) # multiple regression mod.lo <- loess(prestige ~ income + education, span=.5, degree=1) summary(mod.lo) inc <- seq(min(income), max(income), len=25) ed <- seq(min(education), max(education), len=25) newdata <- expand.grid(income=inc,education=ed) fit.prestige <- matrix(predict(mod.lo, newdata), 25, 25) persp(inc, ed, fit.prestige, theta=45, phi=30, ticktype='detailed', xlab='Income', ylab='Education', zlab='Prestige', expand=2/3, shade=0.5) mod.lo.inc <- loess(prestige ~ income, span=.7, degree=1) mod.lo.ed <- loess(prestige ~ education, span=.7, degree=1) anova(mod.lo.inc, mod.lo) anova(mod.lo.ed, mod.lo) # smoothing splines plot(income, prestige) inc.100 <- seq(min(income), max(income), len=100) pres <- predict(mod.lo.inc, data.frame(income=inc.100)) lines(inc.100, pres, lty=2, lwd=2) mod.lo.inc lines(smooth.spline(income, prestige, df=3.85), lwd=2) # additive regression models library(mgcv) mod.gam <- gam(prestige ~ s(income) + s(education)) mod.gam fit.prestige <- matrix(predict(mod.gam, newdata), 25, 25) persp(inc, ed, fit.prestige, theta=45, phi=30, ticktype='detailed', xlab='Income', ylab='Education', zlab='Prestige', expand=2/3, shade=0.5) par(mfrow=c(1,2)) plot(mod.gam) fit.prestige <- matrix(predict(mod.gam, newdata), 25, 25) persp(inc, ed, fit.prestige, theta=45, phi=30, ticktype='detailed', xlab='Income', ylab='Education', zlab='Prestige', expand=2/3, shade=0.5) # generalized nonparametric regression detach(Prestige) remove(list=objects()) # clean up everything data(Mroz) attach(Mroz) k5f <- factor(k5) k618f <- factor(k618) mod.1 <- gam(lfp ~ s(age) + s(inc) + k5f + k618f + wc + hc, family=binomial) summary(mod.1) anova(mod.1) par(mfrow=c(1,2)) plot(mod.1) mod.2 <- gam(lfp ~ age + s(inc) + k5f + k618f + wc + hc, family=binomial) anova(mod.2, mod.1, test='Chisq') mod.3 <- gam(lfp ~ s(age) + inc + k5f + k618f + wc + hc, family=binomial) anova(mod.3, mod.1, test='Chisq') mod.4 <- gam(lfp ~ s(inc) + k5f + k618f + wc + hc, # omits age family=binomial) anova(mod.4, mod.1, test='Chisq')