##-----------------------------------------------------------------##
##          Script for Structural-Equation Models                  ##
##                        John Fox                                 ##
##                     IQS Barcelona                               ##
##                     January  2016                               ##
##-----------------------------------------------------------------##

library(sem)

# Duncan, Haller, and Portes's nonrecursive peer-influences model

# FIML estimates from the sem function

R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", 
    "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"))
.6247     
.3269  .3669       
.4216  .3275  .6404
.2137  .2742  .1124  .0839
.4105  .4043  .2903  .2598  .1839
.3240  .4047  .3054  .2786  .0489  .2220
.2930  .2407  .4105  .3607  .0186  .1861  .2707
.2995  .2863  .5191  .5007  .0782  .3355  .2302  .2950
.0760  .0702  .2784  .1988  .1147  .1021  .0931 -.0438  .2087

    # using the text argument is more convenient in RStudio:

R.DHP <- readMoments(diag=FALSE, names=c("ROccAsp", "REdAsp", "FOccAsp", 
    "FEdAsp", "RParAsp", "RIQ", "RSES", "FSES", "FIQ", "FParAsp"),
    text="
    .6247     
    .3269  .3669       
    .4216  .3275  .6404
    .2137  .2742  .1124  .0839
    .4105  .4043  .2903  .2598  .1839
    .3240  .4047  .3054  .2786  .0489  .2220
    .2930  .2407  .4105  .3607  .0186  .1861  .2707
    .2995  .2863  .5191  .5007  .0782  .3355  .2302  .2950
    .0760  .0702  .2784  .1988  .1147  .1021  .0931 -.0438  .2087
    ")

# the following specifications are all equivalent:

model.DHP.1 <- specifyModel() 
RIQ     ->  ROccAsp, gamma51,  NA
RSES    ->  ROccAsp, gamma52,  NA
FSES    ->  FOccAsp, gamma63,  NA
FIQ     ->  FOccAsp, gamma64,  NA  
FOccAsp ->  ROccAsp, beta56,   NA
ROccAsp ->  FOccAsp, beta65,   NA
ROccAsp <-> ROccAsp, sigma77,  NA 
FOccAsp <-> FOccAsp, sigma88,  NA   
ROccAsp <-> FOccAsp, sigma78,  NA

model.DHP.1 <- specifyModel(text=" 
    RIQ     ->  ROccAsp, gamma51
    RSES    ->  ROccAsp, gamma52
    FSES    ->  FOccAsp, gamma63
    FIQ     ->  FOccAsp, gamma64
    FOccAsp ->  ROccAsp, beta56
    ROccAsp ->  FOccAsp, beta65
    ROccAsp <-> FOccAsp, sigma78
    ")

model.DHP.1 <- specifyEquations(text="
    ROccAsp = gamma51*RIQ + gamma52*RSES + beta56*FOccAsp
    FOccAsp = gamma64*FIQ + gamma63*FSES + beta65*ROccAsp
    C(ROccAsp, FOccAsp) = sigma78
    ")

model.DHP.1

pathDiagram(model.DHP.1, obs.variables=c("ROccAsp", "RIQ", "RSES", 
    "FOccAsp", "FIQ", "FSES"))

sem.DHP.1 <- sem(model.DHP.1, S=R.DHP, N=329,
    fixed.x=c("RIQ", "RSES", "FSES", "FIQ"))
summary(sem.DHP.1)

# A latent-variable version of the Duncan, Haller, and Portes model

model.DHP.2 <- specifyEquations(covs="RGenAsp, FGenAsp", text="
    RGenAsp = gam11*RParAsp + gam12*RIQ + gam13*RSES + gam14*FSES + beta12*FGenAsp
    FGenAsp = gam23*RSES + gam24*FSES + gam25*FIQ + gam26*FParAsp + beta21*RGenAsp
    ROccAsp = 1*RGenAsp
    REdAsp = lam21(1)*RGenAsp  # to illustrate setting start values
    FOccAsp = 1*FGenAsp
    FEdAsp = lam42(1)*FGenAsp
    ")

model.DHP.2

sem.DHP.2 <- sem(model.DHP.2, R.DHP, 329,
    fixed.x=c('RParAsp', 'RIQ', 'RSES', 'FSES', 'FIQ', 'FParAsp'))
summary(sem.DHP.2)

pathDiagram(model.DHP.2,
    obs.variables=c("RParAsp", "RIQ", "RSES", "FSES", "FIQ", 
        "FParAsp", "ROccAsp", "REdAsp", "FOccAsp", "FEdAsp"),
    style="traditional", 
    node.colors=c("pink", "lightblue", "lightgreen"),
    min.rank="RIQ, RSES, RParAsp, FParAsp, FSES, FIQ",
    max.rank="ROccAsp, REdAsp, FEdAsp, FOccAsp",
    same.rank="RGenAsp, FGenAsp",
    var.labels=c(RParAsp="Respondent Parental Aspiration", 
        RIQ="Respondent IQ",
        RSES="Respondent SES",
        FSES="Friend SES",
        FIQ="Friend IQ",
        FParAsp="Friend Parental Aspiration",
        ROccAsp="Respondent Occupational Aspiration",
        REdAsp="Respondent Educational Aspiration",
        RGenAsp="Respondent General Aspiration",
        FOccAsp="Friend Occupational Aspiration",
        FEdAsp="Friend Educational Aspiration",
        FGenAsp="Friend General Aspiration",
        math(c(RGenAsp.error="xi_{1}",
            FGenAsp.error="xi_{2}",
            ROccAsp.error="epsilon_{1}",
            REdAsp.error="epsilon_{2}",
            FOccAsp.error="epsilon_{3}",
            FEdAsp.error="epsilon_{4}"))),
    par.labels=math(c(gam11="gamma_{11}",
        gam12="gamma_{12}",
        gam13="gamma_{13}",
        gam14="gamma_{14}",
        gam23="gamma_{23}",
        gam24="gamma_{24}",
        gam25="gamma_{25}",
        gam26="gamma_{26}",
        beta12="beta_{12}",
        beta21="beta_{21}",
        lam21="lambda_{21}",
        lam42="lambda_{42}",
        ps11="psi_{11}",
        ps22="psi_{22}",
        ps12="psi_{12}",
        theta1="theta_{1}",
        theta2="theta_{2}",
        theta3="theta_{3}",
        theta4="theta_{4}")))

# Further examples as time and interest permit:

# Confirmatory factor analyis of Holzinger's data on 9 psychological tests

R.Holzinger <- readMoments(diag=FALSE, names=c("Word.meaning", 
    "Sentence.completion", "Odd.words", "Mixed.arithmetic", "Remainders", 
    "Missing.numbers", "Gloves", "Boots", "Hatchets"), text="                                           
    .75                                       
    .78   .72                                   
    .44   .52    .47                              
    .45   .53    .48    .82                         
    .51   .58    .54    .82    .74                    
    .21   .23    .28    .33    .37    .35               
    .30   .32    .37    .33    .36    .38    .45          
    .31   .30    .37    .31    .36    .38    .52    .67    
    ")

R.Holzinger

model.Holzinger.1 <- cfa(reference.indicators=FALSE, covs=NULL, 
    # model assuming uncorrelated factors
    text="
    Verbal: Word.meaning, Sentence.completion, Odd.words
    Arithmetic: Mixed.arithmetic, Remainders, Missing.numbers
    Spatial: Gloves, Boots, Hatchets
    ")

model.Holzinger.1
pathDiagram(model.Holzinger.1, obs.variables=c("Word.meaning", 
    "Sentence.completion", "Odd.words", "Mixed.arithmetic", "Remainders", 
    "Missing.numbers", "Gloves", "Boots", "Hatchets"))

sem.Holzinger.1 <- sem(model.Holzinger.1, S=R.Holzinger, N=696)
summary(sem.Holzinger.1)

model.Holzinger.2 <- cfa(reference.indicators=FALSE,
    # model allowing correlated factors
    text="
    Verbal: Word.meaning, Sentence.completion, Odd.words
    Arithmetic: Mixed.arithmetic, Remainders, Missing.numbers
    Spatial: Gloves, Boots, Hatchets
    ")

sem.Holzinger.2 <- sem(model.Holzinger.2, S=R.Holzinger, N=696)
summary(sem.Holzinger.2)

anova(sem.Holzinger.1, sem.Holzinger.2) # LR test for uncorrelated factors


# Bollen's industrialization and democracy model (using data set)

head(Bollen)
?Bollen

model.bollen <- specifyEquations(text="
    y1 = 1*Demo60
    y2 = lam2*Demo60
    y3 = lam3*Demo60
    y4 = lam4*Demo60
    y5 = 1*Demo65
    y6 = lam2*Demo65
    y7 = lam3*Demo65
    y8 = lam4*Demo65
    x1 = 1*Indust
    x2 = lam6*Indust
    x3 = lam7*Indust
    c(y1, y5) = theta15
    c(y2, y4) = theta24
    c(y2, y6) = theta26
    c(y3, y7) = theta37
    c(y4, y8) = theta48
    c(y6, y8) = theta68
    Demo60 = gamma11*Indust
    Demo65 = gamma21*Indust + beta21*Demo60
    v(Indust) = phi
    ")

sem.bollen <- sem(model.bollen, data=Bollen)
summary(sem.bollen)
pathDiagram(sem.bollen, edge.labels="values")

summary(sem.bollen, robust=TRUE)  # robust standard errors and tests

round(sqrt(diag(vcov(sem.bollen)))/
        sqrt(diag(vcov(sem.bollen, robust=TRUE))), 2) # compare


# Canadian National Election Study CFA 
#      with ordinal data and bootstrap standard errors

head(CNES)
?CNES

model.cnes <- cfa(reference.indicators=FALSE, text="
    F: MBSA2, MBSA7, MBSA8, MBSA9
    ")
model.cnes
pathDiagram(model.cnes, obs.variables=c("MBSA2", "MBSA7", "MBSA8", "MBSA9"))

library(polycor)
hcor <- function(data) hetcor(data, std.err=FALSE)$correlations
(R.cnes <- hcor(CNES))

summary(sem.cnes <- sem(model.cnes, R.cnes, N=1529))

set.seed(12345) # for reproducibility
system.time(boot.cnes <- bootSem(sem.cnes, R=100, Cov=hcor, data=CNES))    
(boot.summary <- summary(boot.cnes, type="norm"))    
round(sqrt(diag(vcov(sem.cnes)))
    /boot.summary$table[, "Std.Error"], 2)   # compare


# Mental tests CFA with missing data

head(Tests)
nrow(Tests)
?Tests

mod.cfa.tests <- cfa(raw=TRUE, text="
    verbal: x1, x2, x3
    math: y1, y2, y3
    ")
mod.cfa.tests

# FIML estimator:
cfa.tests <- sem(mod.cfa.tests, data=Tests, na.action=na.pass, 
    objective=objectiveFIML, fixed.x="Intercept")
summary(cfa.tests, saturated=TRUE)

# complete-case analysis, ML estimator:
cfa.tests.cc <- sem(mod.cfa.tests, data=Tests, raw=TRUE, fixed.x="Intercept")
library(car)  # for compareCoefs()
compareCoefs(cfa.tests, cfa.tests.cc)

# multiple imputation of missing data (requires patience)

imps <- miSem(mod.cfa.tests, data=Tests, fixed.x="Intercept", 
    raw=TRUE, seed=12345)
summary(imps, digits=2)


# Holzinger-Swineford multi-group CFA

library(MBESS)
data(HS.data)
head(HS.data)
?HS.data

mod.hs <- cfa(text="
    spatial: visual, cubes, paper, flags
    verbal: general, paragrap, sentence, wordc, wordm
    memory: wordr, numberr, figurer, object, numberf, figurew
    math: deduct, numeric, problemr, series, arithmet
    ")

mod.mg <- multigroupModel(mod.hs, groups=c("Female", "Male")) 
class(mod.mg)

sem.mg <- sem(mod.mg, data=HS.data, group="Gender",
    formula = ~ visual + cubes + paper + flags +
        general + paragrap + sentence + wordc + wordm +
        wordr + numberr + figurer + object + numberf + figurew +
        deduct + numeric + problemr + series + arithmet
)
summary(sem.mg)

# with cross-group equality constraints:

mod.mg.eq <- multigroupModel(mod.hs, groups=c("Female", "Male"), allEqual=TRUE)

sem.mg.eq <- sem(mod.mg.eq, data=HS.data, group="Gender",
    formula = ~ visual + cubes + paper + flags +
        general + paragrap + sentence + wordc + wordm +
        wordr + numberr + figurer + object + numberf + figurew +
        deduct + numeric + problemr + series + arithmet
)
summary(sem.mg.eq)

anova(sem.mg, sem.mg.eq) # test equality constraints
BIC(sem.mg)
BIC(sem.mg.eq)

modIndices(sem.mg.eq)