##-----------------------------------------------------------------## ## Script for Structural-Equation Models ## ## John Fox ## ## IQS Barcelona ## ## January 2016 ## ##-----------------------------------------------------------------## library(sem) # Duncan, Haller, and Portes's nonrecursive peer-influences model # FIML estimates from the sem function R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp")) .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 # using the text argument is more convenient in RStudio: R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"), text=" .6247 .3269 .3669 .4216 .3275 .6404 .2137 .2742 .1124 .0839 .4105 .4043 .2903 .2598 .1839 .3240 .4047 .3054 .2786 .0489 .2220 .2930 .2407 .4105 .3607 .0186 .1861 .2707 .2995 .2863 .5191 .5007 .0782 .3355 .2302 .2950 .0760 .0702 .2784 .1988 .1147 .1021 .0931 -.0438 .2087 ") # the following specifications are all equivalent: model.DHP.1 <- specifyModel() RIQ -> ROccAsp, gamma51, NA RSES -> ROccAsp, gamma52, NA FSES -> FOccAsp, gamma63, NA FIQ -> FOccAsp, gamma64, NA FOccAsp -> ROccAsp, beta56, NA ROccAsp -> FOccAsp, beta65, NA ROccAsp <-> ROccAsp, sigma77, NA FOccAsp <-> FOccAsp, sigma88, NA ROccAsp <-> FOccAsp, sigma78, NA model.DHP.1 <- specifyModel(text=" RIQ -> ROccAsp, gamma51 RSES -> ROccAsp, gamma52 FSES -> FOccAsp, gamma63 FIQ -> FOccAsp, gamma64 FOccAsp -> ROccAsp, beta56 ROccAsp -> FOccAsp, beta65 ROccAsp <-> FOccAsp, sigma78 ") model.DHP.1 <- specifyEquations(text=" ROccAsp = gamma51*RIQ + gamma52*RSES + beta56*FOccAsp FOccAsp = gamma64*FIQ + gamma63*FSES + beta65*ROccAsp C(ROccAsp, FOccAsp) = sigma78 ") model.DHP.1 pathDiagram(model.DHP.1, obs.variables=c("ROccAsp", "RIQ", "RSES", "FOccAsp", "FIQ", "FSES")) sem.DHP.1 <- sem(model.DHP.1, S=R.DHP, N=329, fixed.x=c("RIQ", "RSES", "FSES", "FIQ")) summary(sem.DHP.1) # A latent-variable version of the Duncan, Haller, and Portes model model.DHP.2 <- specifyEquations(covs="RGenAsp, FGenAsp", text=" RGenAsp = gam11*RParAsp + gam12*RIQ + gam13*RSES + gam14*FSES + beta12*FGenAsp FGenAsp = gam23*RSES + gam24*FSES + gam25*FIQ + gam26*FParAsp + beta21*RGenAsp ROccAsp = 1*RGenAsp REdAsp = lam21(1)*RGenAsp # to illustrate setting start values FOccAsp = 1*FGenAsp FEdAsp = lam42(1)*FGenAsp ") model.DHP.2 sem.DHP.2 <- sem(model.DHP.2, R.DHP, 329, fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp')) summary(sem.DHP.2) pathDiagram(model.DHP.2, obs.variables=c("RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp", "ROccAsp", "REdAsp", "FOccAsp", "FEdAsp"), style="traditional", node.colors=c("pink", "lightblue", "lightgreen"), min.rank="RIQ, RSES, RParAsp, FParAsp, FSES, FIQ", max.rank="ROccAsp, REdAsp, FEdAsp, FOccAsp", same.rank="RGenAsp, FGenAsp", var.labels=c(RParAsp="Respondent Parental Aspiration", RIQ="Respondent IQ", RSES="Respondent SES", FSES="Friend SES", FIQ="Friend IQ", FParAsp="Friend Parental Aspiration", ROccAsp="Respondent Occupational Aspiration", REdAsp="Respondent Educational Aspiration", RGenAsp="Respondent General Aspiration", FOccAsp="Friend Occupational Aspiration", FEdAsp="Friend Educational Aspiration", FGenAsp="Friend General Aspiration", math(c(RGenAsp.error="xi_{1}", FGenAsp.error="xi_{2}", ROccAsp.error="epsilon_{1}", REdAsp.error="epsilon_{2}", FOccAsp.error="epsilon_{3}", FEdAsp.error="epsilon_{4}"))), par.labels=math(c(gam11="gamma_{11}", gam12="gamma_{12}", gam13="gamma_{13}", gam14="gamma_{14}", gam23="gamma_{23}", gam24="gamma_{24}", gam25="gamma_{25}", gam26="gamma_{26}", beta12="beta_{12}", beta21="beta_{21}", lam21="lambda_{21}", lam42="lambda_{42}", ps11="psi_{11}", ps22="psi_{22}", ps12="psi_{12}", theta1="theta_{1}", theta2="theta_{2}", theta3="theta_{3}", theta4="theta_{4}"))) # Further examples as time and interest permit: # Confirmatory factor analyis of Holzinger's data on 9 psychological tests R.Holzinger <- readMoments(diag=FALSE, names=c("Word.meaning", "Sentence.completion", "Odd.words", "Mixed.arithmetic", "Remainders", "Missing.numbers", "Gloves", "Boots", "Hatchets"), text=" .75 .78 .72 .44 .52 .47 .45 .53 .48 .82 .51 .58 .54 .82 .74 .21 .23 .28 .33 .37 .35 .30 .32 .37 .33 .36 .38 .45 .31 .30 .37 .31 .36 .38 .52 .67 ") R.Holzinger model.Holzinger.1 <- cfa(reference.indicators=FALSE, covs=NULL, # model assuming uncorrelated factors text=" Verbal: Word.meaning, Sentence.completion, Odd.words Arithmetic: Mixed.arithmetic, Remainders, Missing.numbers Spatial: Gloves, Boots, Hatchets ") model.Holzinger.1 pathDiagram(model.Holzinger.1, obs.variables=c("Word.meaning", "Sentence.completion", "Odd.words", "Mixed.arithmetic", "Remainders", "Missing.numbers", "Gloves", "Boots", "Hatchets")) sem.Holzinger.1 <- sem(model.Holzinger.1, S=R.Holzinger, N=696) summary(sem.Holzinger.1) model.Holzinger.2 <- cfa(reference.indicators=FALSE, # model allowing correlated factors text=" Verbal: Word.meaning, Sentence.completion, Odd.words Arithmetic: Mixed.arithmetic, Remainders, Missing.numbers Spatial: Gloves, Boots, Hatchets ") sem.Holzinger.2 <- sem(model.Holzinger.2, S=R.Holzinger, N=696) summary(sem.Holzinger.2) anova(sem.Holzinger.1, sem.Holzinger.2) # LR test for uncorrelated factors # Bollen's industrialization and democracy model (using data set) head(Bollen) ?Bollen model.bollen <- specifyEquations(text=" y1 = 1*Demo60 y2 = lam2*Demo60 y3 = lam3*Demo60 y4 = lam4*Demo60 y5 = 1*Demo65 y6 = lam2*Demo65 y7 = lam3*Demo65 y8 = lam4*Demo65 x1 = 1*Indust x2 = lam6*Indust x3 = lam7*Indust c(y1, y5) = theta15 c(y2, y4) = theta24 c(y2, y6) = theta26 c(y3, y7) = theta37 c(y4, y8) = theta48 c(y6, y8) = theta68 Demo60 = gamma11*Indust Demo65 = gamma21*Indust + beta21*Demo60 v(Indust) = phi ") sem.bollen <- sem(model.bollen, data=Bollen) summary(sem.bollen) pathDiagram(sem.bollen, edge.labels="values") summary(sem.bollen, robust=TRUE) # robust standard errors and tests round(sqrt(diag(vcov(sem.bollen)))/ sqrt(diag(vcov(sem.bollen, robust=TRUE))), 2) # compare # Canadian National Election Study CFA # with ordinal data and bootstrap standard errors head(CNES) ?CNES model.cnes <- cfa(reference.indicators=FALSE, text=" F: MBSA2, MBSA7, MBSA8, MBSA9 ") model.cnes pathDiagram(model.cnes, obs.variables=c("MBSA2", "MBSA7", "MBSA8", "MBSA9")) library(polycor) hcor <- function(data) hetcor(data, std.err=FALSE)$correlations (R.cnes <- hcor(CNES)) summary(sem.cnes <- sem(model.cnes, R.cnes, N=1529)) set.seed(12345) # for reproducibility system.time(boot.cnes <- bootSem(sem.cnes, R=100, Cov=hcor, data=CNES)) (boot.summary <- summary(boot.cnes, type="norm")) round(sqrt(diag(vcov(sem.cnes))) /boot.summary$table[, "Std.Error"], 2) # compare # Mental tests CFA with missing data head(Tests) nrow(Tests) ?Tests mod.cfa.tests <- cfa(raw=TRUE, text=" verbal: x1, x2, x3 math: y1, y2, y3 ") mod.cfa.tests # FIML estimator: cfa.tests <- sem(mod.cfa.tests, data=Tests, na.action=na.pass, objective=objectiveFIML, fixed.x="Intercept") summary(cfa.tests, saturated=TRUE) # complete-case analysis, ML estimator: cfa.tests.cc <- sem(mod.cfa.tests, data=Tests, raw=TRUE, fixed.x="Intercept") library(car) # for compareCoefs() compareCoefs(cfa.tests, cfa.tests.cc) # multiple imputation of missing data (requires patience) imps <- miSem(mod.cfa.tests, data=Tests, fixed.x="Intercept", raw=TRUE, seed=12345) summary(imps, digits=2) # Holzinger-Swineford multi-group CFA library(MBESS) data(HS.data) head(HS.data) ?HS.data mod.hs <- cfa(text=" spatial: visual, cubes, paper, flags verbal: general, paragrap, sentence, wordc, wordm memory: wordr, numberr, figurer, object, numberf, figurew math: deduct, numeric, problemr, series, arithmet ") mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) class(mod.mg) sem.mg <- sem(mod.mg, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg) # with cross-group equality constraints: mod.mg.eq <- multigroupModel(mod.hs, groups=c("Female", "Male"), allEqual=TRUE) sem.mg.eq <- sem(mod.mg.eq, data=HS.data, group="Gender", formula = ~ visual + cubes + paper + flags + general + paragrap + sentence + wordc + wordm + wordr + numberr + figurer + object + numberf + figurew + deduct + numeric + problemr + series + arithmet ) summary(sem.mg.eq) anova(sem.mg, sem.mg.eq) # test equality constraints BIC(sem.mg) BIC(sem.mg.eq) modIndices(sem.mg.eq)