##------------------------------------------------------------------------## ## Script for Chapter 5, An R and S-PLUS Companion to Applied Regression ## ## John Fox ## ## Sage Publications, 2002 ## ##------------------------------------------------------------------------## # preliminaries options(width=65) options(digits=5) # logit, probit, and c-log-log links eta<-seq(-4, 4, length=100) mu.logit<-1/(1+exp(-eta)) mu.probit<-pnorm(eta, 0, pi/sqrt(3)) mu.cloglog<-1-exp(-exp(eta)) plot(c(eta,eta,eta), c(mu.logit,mu.logit,mu.cloglog), xlab=expression(eta), ylab=expression(mu == L^{-1}*{(eta)}), lwd=2, type="n", las=1) ## for S-PLUS: ## plot(c(eta,eta,eta), c(mu.logit,mu.logit,mu.cloglog), 'eta', ## ylab='mu', lwd=2, type="n", las=1) lines(spline(eta, mu.logit), lwd=3) lines(spline(eta, mu.probit), lwd=3, lty=3) lines(spline(eta, mu.cloglog), lwd=3, lty=4) legend(locator(1), lty=c(1,3,4), lwd=3, legend=c("logit","probit","complementary log-log")) #logistic regression library(car) data(Mroz) Mroz[sort(sample(753,10)),] attach(Mroz) mod.mroz <- glm(lfp~k5+k618+age+wc+hc+lwg+inc, fam=binomial) summary(mod.mroz) anova(update(mod.mroz, . ~ . - k5), mod.mroz, test="Chisq") Anova(mod.mroz) # with binomial response closeness <- factor(rep(c('one.sided', 'close'), c(3,3)), levels=c('one.sided', 'close')) preference <- ordered(rep(c('weak', 'medium', 'strong'), 2), levels=c('weak', 'medium', 'strong')) voted <- c(91, 121, 64, 214, 284, 201) did.not.vote <- c(39, 49, 24, 87, 76, 25) logit.turnout <- log(voted/did.not.vote) data.frame(closeness, preference, voted, did.not.vote, logit=logit.turnout) par(mar=c(5.1, 4.1, 4.1, 4.1)) plot(rep(1:3, 2), logit.turnout, type='n', axes=F, xlab='Intensity of Preference', ylab='Logit(Voted/Did Not Vote)') axis(1, at=1:3, labels=c('Weak', 'Medium', 'Strong')) axis(2) prob.axis(side='right', at=seq(.7, .85, by=.05), axis.title='Proportion(Voted)') box() points(1:3, logit.turnout[1:3], pch=1, type='b', lty=1, lwd=3, cex=2) points(1:3, logit.turnout[4:6], pch=16, type='b', lty=2, lwd=3, cex=2) text(locator(2), c('Close', 'One-Sided')) options(contrasts=c('contr.sum', 'contr.poly')) mod.campbell <- glm(cbind(voted, did.not.vote) ~ closeness*preference, family=binomial) summary(mod.campbell) Anova(mod.campbell) # Polytomous Data data(Womenlf) Womenlf[sort(sample(1:263, 10)),] attach(Womenlf) working <- recode(partic, " 'not.work' = 'no'; else = 'yes' ") fulltime <- recode (partic, " 'fulltime' = 'yes'; 'parttime' = 'no'; 'not.work' = NA") working fulltime options(contrasts=c('contr.treatment', 'contr.poly')) mod.working <- glm(working ~ hincome + children + region, family=binomial) summary(mod.working) mod.fulltime <- glm(fulltime ~ hincome + children + region, family=binomial) summary(mod.fulltime) Anova(mod.working) Anova(mod.fulltime) mod.working.1 <- update(mod.working, . ~ . - region) mod.fulltime.1 <- update(mod.fulltime, . ~ . - region) predictors <- expand.grid(hincome=1:45, children=c('absent', 'present')) predictors p.work <- predict(mod.working.1, predictors, type='response') p.fulltime <- predict(mod.fulltime.1, predictors, type='response') p.full <- p.work * p.fulltime p.part <- p.work * (1 - p.fulltime) p.not <- 1 - p.work par(mfrow=c(1,2)) plot(c(1,45), c(0,1), type='n', xlab="Husband's Income", ylab='Fitted Probability', main="Children Absent") lines(1:45, p.not[1:45], lty=1, lwd=3) lines(1:45, p.part[1:45], lty=2, lwd=3) lines(1:45, p.full[1:45], lty=3, lwd=3) legend(locator(1), lty=1:3, lwd=3, legend=c('not working', 'part-time', 'full-time')) plot(c(1,45), c(0,1), type='n', xlab="Husband's Income", ylab='Fitted Probability', main="Children Present") lines(1:45, p.not[46:90], lty=1, lwd=3) lines(1:45, p.part[46:90], lty=2, lwd=3) lines(1:45, p.full[46:90], lty=3, lwd=3) # multinomial logistic regression participation <- ordered(partic, levels=c('not.work', 'parttime', 'fulltime')) library(nnet) library(MASS) mod.multinom <- multinom(participation ~ hincome + children) summary(mod.multinom, cor=F, Wald=T) p.fit <- predict(mod.multinom, predictors, type='probs') data.frame(predictors, p.fit)[1:5,] plot(c(1,45), c(0,1), type='n', xlab="Husband's Income", ylab='Fitted Probability', main="Children Absent") lines(1:45, p.fit[1:45, 'not.work'], lty=1, lwd=3) lines(1:45, p.fit[1:45, 'parttime'], lty=2, lwd=3) lines(1:45, p.fit[1:45, 'fulltime'], lty=3, lwd=3) legend(locator(1), lty=1:3, lwd=3, legend=c('not working', 'part-time', 'full-time')) plot(c(1,45), c(0,1), type='n', xlab="Husband's Income", ylab='Fitted Probability', main="Children Present") lines(1:45, p.fit[46:90, 'not.work'], lty=1, lwd=3) lines(1:45, p.fit[46:90, 'parttime'], lty=2, lwd=3) lines(1:45, p.fit[46:90, 'fulltime'], lty=3, lwd=3) # proportional odds model mod.polr <- polr(participation ~ hincome + children) summary(mod.polr) p.fit <- predict(mod.polr, predictors, type='probs') plot(c(1,45), c(0,1), type='n', xlab="Husband's Income", ylab='Fitted Probability', main="Children Absent") lines(1:45, p.fit[1:45, 'not.work'], lty=1, lwd=3) lines(1:45, p.fit[1:45, 'parttime'], lty=2, lwd=3) lines(1:45, p.fit[1:45, 'fulltime'], lty=3, lwd=3) legend(locator(1), lty=1:3, lwd=3, legend=c('not working', 'part-time', 'full-time')) plot(c(1,45), c(0,1), type='n', xlab="Husband's Income", ylab='Fitted Probability', main="Children Present") lines(1:45, p.fit[46:90, 'not.work'], lty=1, lwd=3) lines(1:45, p.fit[46:90, 'parttime'], lty=2, lwd=3) lines(1:45, p.fit[46:90, 'fulltime'], lty=3, lwd=3) 1 - pchisq(441.66 - 422.88, df = 6 - 4) # artificial plot x <- 1:100 logit.1 <- 5 -.2*x logit.2 <- 7 -.2*x logit.3 <- 13 -.2*x p.1 <- 1/(1 + exp(logit.1)) p.2 <- 1/(1 + exp(logit.2)) p.3 <- 1/(1 + exp(logit.3)) plot(c(1, 100), c(0,1), type='n', xlab=expression(eta), ylab='Probability', axes=F) ## S-PLUS: ## plot(c(1, 100), c(0,1), ## type='n', xlab='eta', ylab='Probability', ## axes=F) axis(2) box() lines(1:100, p.1, lty=1, lwd=3) lines(1:100, p.2, lty=2, lwd=3) lines(1:100, p.3, lty=3, lwd=3) legend(locator(1), lty=1:3, lwd=3, legend=c('Pr(y > 1)', 'Pr(y > 2)', 'Pr(y > 3)')) # Poisson models # Poisson regression data(Ornstein) Ornstein[sort(sample(248,10)),] attach(Ornstein) tab <- table(interlocks) tab x <- sort(unique(interlocks)) plot(x, tab, type='h', xlab='Number of Interlocks', ylab='Frequency') points(x, tab, pch=16) mod.ornstein <- glm(interlocks ~ assets + nation + sector, family=poisson) summary(mod.ornstein) Anova(mod.ornstein) # loglinear model counts <- c(91, 39, 121, 49, 64, 24, 214, 87, 284, 76, 201, 25) Campbell <- expand.grid(turnout=c('voted','did.not.vote'), preference=c('weak', 'medium', 'strong'), closeness=c('one.sided', 'close')) cbind(Campbell, counts) options(contrasts=c('contr.sum', 'contr.poly')) attach(Campbell) mod.loglin <- glm(counts ~ closeness*preference*turnout, family=poisson) summary(mod.loglin) Anova(mod.loglin) detach(Campbell) ## create Campbell table as array campbell <- array(counts, c(2,3,2)) dimnames <- list(c('voted', 'did.not.vote'), c('weak', 'medium', 'strong'), c('one.sided', 'close')) names(dimnames)<-c('turnout','preference','closeness') dimnames(campbell)<-dimnames class(campbell) <- "table" campbell Campbell <- as.data.frame(campbell) Campbell ## S-PLUS ## ravel.table <- function(table){ ## cbind(expand.grid(dimnames(table)), ## Freq=as.vector(table)) ## } ## Campbell <- ravel.table(campbell) ## Campbell mod.loglin <- glm(Freq ~ closeness*preference*turnout, family=poisson, data=Campbell) # gamma distributions x <- seq(0.1, 10, by=.02) d.5 <- dgamma(x, shape=.5) d1 <- dgamma(x, shape=1) d2 <- dgamma(x, shape=2) d5 <- dgamma(x, shape=5) plot(rep(x,4), c(d.5, d1, d2, d5), type='n', xlab="y", ylab="p(y)") lines(x, d.5, lty=1, lwd=3) lines(x, d1, lty=1, lwd=3) lines(x, d2, lty=1, lwd=3) lines(x, d5, lty=1, lwd=3) ## The following text command does not work in S-PLUS: text (locator(4), pos=4, labels=c(expression(psi == 0.5), expression(psi == 1),expression(psi == 2),expression(psi == 5))) # overdispersed Poisson mod.ornstein.q <- update(mod.ornstein, family=quasipoisson) summary(mod.ornstein.q) Anova(mod.ornstein.q, test="F") # S-PLUS (and R) alternative: mod.ornstein.q <- update(mod.ornstein, family=quasi(link=log, var='mu')) summary(mod.ornstein.q) Anova(mod.ornstein.q, test="F") # arguments to glm args(glm)