##--------------------------------------------------------------##
##                  Script for Lecture 4:                       ##
##               Linear Models in R, Part II                    ##
##                        John Fox                              ##
##   Introduction to the R Statistical Computing Environment    ##
##                          ICPSR                               ##
##                          2022                                ##
##--------------------------------------------------------------##

library("car")

Prestige.2 <- na.omit(Prestige) # continuing from last lecture
Prestige.2$type <- factor(Prestige.2$type, levels=c("bc", "wc", "prof"))
prestige.mod.2 <- lm(prestige ~ log2(income) + education + type, 
                     data=Prestige.2)
S(prestige.mod.2)


# Hypothesis tests

    # ANOVA tables

anova(prestige.mod.2) # sequential (Type-I) tests -- not generally useful
Anova(prestige.mod.2) # partial (Type-II) tests

prestige.mod.3 <- lm(prestige ~ (log2(income) + education)*type, data=Prestige.2)
Anova(prestige.mod.3)

    # direct computation of LR F tests for nested models

anova(prestige.mod.2, prestige.mod.3) # both sets of interactions

brief((mod.duncan.1 <- lm(prestige ~ income + education, data=Duncan)))
brief(mod.duncan.2 <- lm(prestige ~ I(income + education), data=Duncan)) # equal slopes
anova(mod.duncan.1, mod.duncan.2) # test of equal slopes

    # Wald tests of linear hypotheses

brief(prestige.mod.3)
matchCoefs(prestige.mod.3, ":")
linearHypothesis(prestige.mod.3, matchCoefs(prestige.mod.3, ":")) # no interactions

linearHypothesis(mod.duncan.1, "income = education") # test of equal slopes


    # Delta method for confidence intervals of nonlinear functions of coefficients

deltaMethod(mod.duncan.1, "income/education")


# Visualizing fitted models: Effect displays

library("effects")
plot(predictorEffects(prestige.mod.3, c("income", "education")))

# Some regression (model) diagnostics

    # Unusual data diagnostics

        # studentized residuals, hatvalues, and Cook's distances

brief(mod.duncan.1)
influencePlot(mod.duncan.1)
outlierTest(mod.duncan.1)

        # added-variable ("partial regression") plots

avPlots(mod.duncan.1, id=list(n=3, method="mahal"))

whichNames(c("minister", "conductor"), Duncan)
mod.duncan.3 <- update(mod.duncan.1, subset = - c(6, 16))
compareCoefs(mod.duncan.1, mod.duncan.3)

    # Nonlinearity diagnostics

        # component + residual ("partial residual") plots

brief(prestige.mod.4 <- lm(prestige ~ education + income  + women, 
                           data=Prestige))
crPlots(prestige.mod.4)

brief(prestige.mod.5 <- lm(prestige ~ education + log2(income)  
                           + poly(women, 2, raw=TRUE), 
                           data=Prestige))
crPlots(prestige.mod.5)

        # adding partial residuals to effect plots

brief(prestige.mod.2)
plot(Effect(c("income", "type"), prestige.mod.2, residuals=TRUE), 
     partial.residual=list(span=1))
plot(Effect(c("education", "type"), prestige.mod.2, residuals=TRUE), 
     partial.residual=list(span=1))

brief(prestige.mod.6 <- lm(prestige ~ type*(income + education), data=Prestige.2))
Anova(prestige.mod.6)
plot(allEffects(prestige.mod.6, partial.residuals=TRUE), span=1)

    # Non-normality diagnostics

densityPlot(rstudent(prestige.mod.2))

qqPlot(prestige.mod.2)


    # Nonconstant variance diagnostics

        # (studentized) residuals vs. fitted values
plot(fitted(prestige.mod.2), rstudent(prestige.mod.2))
abline(h=0)

spreadLevelPlot(prestige.mod.2)

ncvTest(prestige.mod.2)
ncvTest(prestige.mod.2, ~ education + log2(income) + women)