##--------------------------------------------------------------## ## Script for Lecture 3: ## ## Linear Models in R, Part I ## ## John Fox ## ## Introduction to the R Statistical Computing Environment ## ## ICPSR ## ## 2022 ## ##--------------------------------------------------------------## # multiple regression (Prestige data) library("car") names(Prestige) # from carData package some(Prestige) # 10 randomly sampling observations brief(Prestige) View(Prestige) ?Prestige #help on data set prestige.mod.1 <- lm(prestige ~ education + log2(income) + women, data=Prestige) summary(prestige.mod.1) S(prestige.mod.1) brief(prestige.mod.1) # dummy regression Prestige$type # a factor class(Prestige$type) str(Prestige$type) # structure: also see RStudio Environment tab Prestige.2 <- na.omit(Prestige) # filter out missing data nrow(Prestige) nrow(Prestige.2) levels(Prestige.2$type) Prestige.2$type <- with(Prestige.2, factor(type, levels=c("bc", "wc", "prof"))) # reorder levels Prestige.2$type # generating contrasts from factors (or character or logical variables) getOption("contrasts") contrasts(Prestige.2$type) some(model.matrix(~ type, data=Prestige.2), 20) contrasts(Prestige.2$type) <- contr.treatment(levels(Prestige.2$type), base=2) # changing baseline level contrasts(Prestige.2$type) contrasts(Prestige.2$type) <- "contr.helmert" # Helmert contrasts contrasts(Prestige.2$type) contrasts(Prestige.2$type) <- "contr.sum" # "deviation" contrasts contrasts(Prestige.2$type) contrasts(Prestige.2$type) <- NULL # back to default Prestige.2$type.ord <- ordered(Prestige.2$type, levels=c("bc", "wc", "prof")) # ordered factor Prestige.2$type.ord round(contrasts(Prestige.2$type.ord), 3) # orthogonal polynomial contrasts crossprod(contrasts(Prestige.2$type.ord)) # floating-point arithmetic # is not exact! round(crossprod(contrasts(Prestige.2$type.ord)), 10) zapsmall(crossprod(contrasts(Prestige.2$type.ord))) # handling of character and logical variables Prestige$prof <- Prestige$type == "prof" Prestige$typec <- as.character(Prestige$type) brief(Prestige) contrasts(Prestige$prof) contrasts(Prestige$typec) # NB: doesn't work, but: some(model.matrix(~typec, data=Prestige)) prestige.mod.2 <- lm(prestige ~ log2(income) + education + type, data=Prestige.2) S(prestige.mod.2) scatter3d(prestige ~ log2(income) + education | type, parallel=TRUE, data=Prestige) prestige.mod.3 <- update(prestige.mod.2, . ~ . + log2(income):type + education:type) # adding interactions S(prestige.mod.3) scatter3d(prestige ~ log2(income) + education | type, parallel=FALSE, data=Prestige) # equivalent specifications: brief(lm(prestige ~ log2(income)*type + education*type, data=Prestige.2)) brief(lm(prestige ~ (log2(income) + education)*type, data=Prestige.2)) # more on lm() (time permitting) args(lm) brief(Davis) ?Davis brief(lm(weight ~ repwt, data=Davis, subset=sex == "F")) # observation selection # (women only) brief(lm(weight ~ repwt, data=Davis, subset=1:100)) # first 100 cases brief(lm(prestige ~ income + education, data=Duncan, subset=-c(6, 16))) # omit cases 6 & 16 brief(lm(conformity ~ partner.status*fcategory, # specifying contrasts contrasts=list(partner.status=contr.sum, fcategory=contr.poly), data=Moore)) brief(lm(100*conformity/40 ~ partner.status*fcategory, data=Moore)) # note computation of y brief(lm(prestige ~ I(income + education), data=Duncan)) # "protecting" expresssion # on RHS of the model