##-------------------------------------------------## ## Introduction to Structural Equation Modeling ## ## with the sem Package in R ## ## John Fox ## ## ISM/Tokyo ## ## November 2008 ## ##-------------------------------------------------## library(sem) # Duncan, Haller, and Portes's nonrecursive peer-influences model # FIML estimates from the sem function R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 # the following specifications are all equivalent: model.DHP.1 <- specifyModel() RIQ -> ROccAsp, gamma51, NA RSES -> ROccAsp, gamma52, NA FSES -> FOccAsp, gamma63, NA FIQ -> FOccAsp, gamma64, NA FOccAsp -> ROccAsp, beta56, NA ROccAsp -> FOccAsp, beta65, NA ROccAsp <-> ROccAsp, sigma77, NA FOccAsp <-> FOccAsp, sigma88, NA ROccAsp <-> FOccAsp, sigma78, NA model.DHP.1 <- specifyModel() RIQ -> ROccAsp, gamma51 RSES -> ROccAsp, gamma52 FSES -> FOccAsp, gamma63 FIQ -> FOccAsp, gamma64 FOccAsp -> ROccAsp, beta56 ROccAsp -> FOccAsp, beta65 ROccAsp <-> FOccAsp, sigma78 model.DHP.1 <- specifyEquations() ROccAsp = gamma51*RIQ + gamma52*RSES + beta56*FOccAsp FOccAsp = gamma64*FIQ + gamma63*FSES + beta65*ROccAsp C(ROccAsp, FOccAsp) = sigma78 sem.DHP.1 <- sem(model.DHP.1, S=R.DHP, N=329, fixed.x=c("RIQ", "RSES", "FSES", "FIQ")) summary(sem.DHP.1) # Blau and Duncan's recursive basic-stratification model R.BD <- read.moments(diag=FALSE, names=c("FatherEd", "FatherOcc", "Education", "FirstJob", "Job1962")) .516 .453 .438 .332 .417 .538 .322 .405 .596 .541 R.BD model.BD <- specifyEquations() Education = gamma31*FatherEd + gamma32*FatherOcc FirstJob = gamma42*FatherOcc + beta43*Education Job1962 = gamma52*FatherOcc + beta53*Education + beta54*FirstJob model.BD sem.BD <- sem(model.BD, R.BD, 20700, fixed.x=c("FatherEd", "FatherOcc")) summary(sem.BD) # note: the OLS estimates # Duncan, Haller, and Portes's block-recursive peer-influences model model.DHP.2 <- specifyModel() RIQ -> ROccAsp, gamma51 RSES -> ROccAsp, gamma52 FSES -> ROccAsp, gamma53 RSES -> FOccAsp, gamma62 FSES -> FOccAsp, gamma63 FIQ -> FOccAsp, gamma64 FOccAsp -> ROccAsp, beta56 ROccAsp -> FOccAsp, beta65 RIQ -> REdAsp, gamma71 RSES -> REdAsp, gamma72 FSES -> REdAsp, gamma73 RSES -> FEdAsp, gamma82 FSES -> FEdAsp, gamma83 FIQ -> FEdAsp, gamma84 ROccAsp -> REdAsp, beta75 FOccAsp -> FEdAsp, beta86 FEdAsp -> REdAsp, beta78 REdAsp -> FEdAsp, beta87 ROccAsp <-> FOccAsp, sigma910 REdAsp <-> FEdAsp, sigma1112 model.DHP.2 sem.DHP.2 <- sem(model.DHP.2, R.DHP, 329, fixed.x=c('RIQ', 'RSES', 'FSES', 'FIQ')) summary(sem.DHP.2) # Confirmatory Factor Analyis of Holzinger's data on 9 psychological tests R.Holzinger <- read.moments(diag=FALSE, names=c("Word.meaning", "Sentence.completion", "Odd.words", "Mixed.arithmetic", "Remainders", "Missing.numbers", "Gloves", "Boots", "Hatchets")) .75 .78 .72 .44 .52 .47 .45 .53 .48 .82 .51 .58 .54 .82 .74 .21 .23 .28 .33 .37 .35 .30 .32 .37 .33 .36 .38 .45 .31 .30 .37 .31 .36 .38 .52 .67 R.Holzinger model.Holzinger.1 model.Holzinger.1 <- cfa(reference.indicators=FALSE, covs=NULL) # model assuming uncorrelated factors Verbal: Word.meaning, Sentence.completion, Odd.words Arithmetic: Mixed.arithmetic, Remainders, Missing.numbers Spatial: Gloves, Boots, Hatchets sem.Holzinger.1 <- sem(model.Holzinger.1, S=R.Holzinger, N=696) summary(sem.Holzinger.1) model.Holzinger.2 <- cfa(reference.indicators=FALSE) # model allowing correlated factors Verbal: Word.meaning, Sentence.completion, Odd.words Arithmetic: Mixed.arithmetic, Remainders, Missing.numbers Spatial: Gloves, Boots, Hatchets sem.Holzinger.2 <- sem(model.Holzinger.2, S=R.Holzinger, N=696) summary(sem.Holzinger.2) anova(sem.Holzinger.1, sem.Holzinger.2) # Bollen's industrialization and democracy model (using data set) head(Bollen) ?Bollen model.bollen <- specifyEquations() y1 = 1*Demo60 y2 = lam2*Demo60 y3 = lam3*Demo60 y4 = lam4*Demo60 y5 = 1*Demo65 y6 = lam2*Demo65 y7 = lam3*Demo65 y8 = lam4*Demo65 x1 = 1*Indust x2 = lam6*Indust x3 = lam7*Indust c(y1, y5) = theta15 c(y2, y4) = theta24 c(y2, y6) = theta26 c(y3, y7) = theta37 c(y4, y8) = theta48 c(y6, y8) = theta68 Demo60 = gamma11*Indust Demo65 = gamma21*Indust + beta21*Demo60 v(Indust) = phi pathDiagram(model.bollen, obs.variables=c(paste("y", 1:8, sep=""), paste("x", 1:3, sep="")), file="Bollen") sem.bollen <- sem(model.bollen, data=Bollen) summary(sem.bollen) summary(sem.bollen, robust=TRUE) # robust standard errors and tests round(sqrt(diag(vcov(sem.bollen)))/ sqrt(diag(vcov(sem.bollen, robust=TRUE))), 2) # compare # Canadian National Election Study CFA with ordinal data and bootstrap standard errors head(CNES) ?CNES model.cnes <- cfa(reference.indicators=FALSE) F: MBSA2, MBSA7, MBSA8, MBSA9 model.cnes library(polycor) hcor <- function(data) hetcor(data, std.err=FALSE)$correlations (R.cnes <- hcor(CNES)) summary(sem.cnes <- sem(model.cnes, R.cnes, N=1529)) set.seed(12345) # for reproducibility system.time(boot.cnes <- bootSem(sem.cnes, R=100, Cov=hcor, data=CNES)) (boot.summary <- summary(boot.cnes, type="norm")) round(sqrt(diag(vcov(sem.cnes)))/boot.summary$table[, "Std.Error"], 2) # compare # Mental tests CFA with missing data head(Tests) nrow(Tests) ?Tests mod.cfa.tests <- cfa(raw=TRUE) verbal: x1, x2, x3 math: y1, y2, y3 mod.cfa.tests # FIML estimator: cfa.tests <- sem(mod.cfa.tests, data=Tests, na.action=na.pass, objective=objectiveFIML, fixed.x="Intercept") summary(cfa.tests, saturated=TRUE) # complete-case analysis, ML estimator: cfa.tests.cc <- sem(mod.cfa.tests, data=Tests, raw=TRUE, fixed.x="Intercept") library(car) # for compareCoefs() compareCoefs(cfa.tests, cfa.tests.cc) # multiple imputation of missing data imps <- miSem(mod.cfa.tests, data=Tests, fixed.x="Intercept", raw=TRUE, seed=12345) summary(imps, digits=2) # Holzinger-Swineford multi-group CFA library(MBESS) data(HS.data) head(HS.data) ?HS.data mod.hs <- cfa() spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) class(mod.mg) sem.mg <- sem(mod.mg, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg) mod.mg.eq <- multigroupModel(mod.hs, groups=c("Female", "Male"), allEqual=TRUE) sem.mg.eq <- sem(mod.mg.eq, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) sem.mg.eq anova(sem.mg.eq, sem.mg) BIC(sem.mg) BIC(sem.mg.eq) modIndices(sem.mg.eq)