##--------------------------------------------------------------## ## Script for Lecture 2: ## ## Workflow in R ## ## John Fox ## ## Introduction to the R Statistical Computing Environment ## ## ICPSR ## ## 2021 ## ##--------------------------------------------------------------## # An Illustrative Data Analysis: # Duncan's Occupational Prestige Regression Revisited library("car") # load car package (for functions) Duncan <- read.table("data/Duncan.txt", header=TRUE) head(Duncan, n=10) # first 10 cases brief(Duncan) # abbreviated output dim(Duncan) # rows and columns View(Duncan) # in the RStudio data viewer summary(Duncan) # invoking the summary() generic function Duncan$type <- factor(Duncan$type, levels=c("bc", "wc", "prof")) summary(Duncan) # Examining the Data with(Duncan, hist(prestige)) # with() gives hist() access to the data scatterplotMatrix( ~ prestige + education + income, # uses a 'one-sided' formula id=list(n=3), data=Duncan) # Duncan's regression (duncan.model <- lm(prestige ~ education + income, data=Duncan)) # note data argument, two-sided formula, return "lm" object summary(duncan.model) # more detailed report # some regression diagnostics # distribution of the (studentized) residuals densityPlot(rstudent(duncan.model)) # uses adaptive kernel estimator qqPlot(duncan.model, id=list(n=3)) # vs. t(n - k - 2) = t(41) outlierTest(duncan.model) # influence diagnostics influencePlot(duncan.model, id=list(n=3)) influenceIndexPlot(duncan.model, id=list(n=3)) avPlots(duncan.model, id=list(n=3, method="mahal")) # nonlinearity diagnostic crPlots(duncan.model) # nonconstant-spread diagnostics spreadLevelPlot(duncan.model, smooth=list(span=1)) ncvTest(duncan.model) ncvTest(duncan.model, var.formula= ~ income + education) # refit without ministers and conductors whichNames(c("minister", "conductor"), Duncan) duncan.model.2 <- update(duncan.model, subset=-c(6, 16)) summary(duncan.model.2) compareCoefs(duncan.model, duncan.model.2)