Bibliotecas
library(knitr)
library(semPlot)
library(tidyverse)
Instale o lavaan
install.packages("lavaan")
Exemplo do Cap 2 do livro de (Beaujean, 2014)
# Ative o lavaan
library(lavaan)
# Cria uma matriz de correlação
rs <- c(1.0,0.20,1,0.24,0.30,1,0.70,0.80,0.30,1)
R <- lower2full(rs)
# Nomeie as variáveis
colnames(R) <- c("X1", "X2", "X3", "Y")
rownames(R) <- c("X1", "X2", "X3", "Y")
kable(R)
X1 |
1.00 |
0.2 |
0.24 |
0.7 |
X2 |
0.20 |
1.0 |
0.30 |
0.8 |
X3 |
0.24 |
0.3 |
1.00 |
0.3 |
Y |
0.70 |
0.8 |
0.30 |
1.0 |
# Sintaxe do modelo
regr_model <-"
Y ~ a*X1 + b*X2 + c*X3
Y ~~ z*Y
"
# fit the model
fit <- sem(regr_model, sample.cov=R, sample.nobs=1000)
summary(fit, rsquare=TRUE)
## lavaan (0.5-23.1097) converged normally after 25 iterations
##
## Number of observations 1000
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Y ~
## X1 (a) 0.571 0.008 74.539 0.000
## X2 (b) 0.700 0.008 89.724 0.000
## X3 (c) -0.047 0.008 -5.980 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Y (z) 0.054 0.002 22.361 0.000
##
## R-Square:
## Estimate
## Y 0.946
# desenha o modelo (http://sachaepskamp.com/semPlot/examples)
semPaths(fit, whatLabels = "est")
# imprime tabela
kable(
parameterEstimates(fit, standardized=TRUE)[,c(1:3,5:6,12)],
digits = 2
)
Y |
~ |
X1 |
0.57 |
0.01 |
0.57 |
Y |
~ |
X2 |
0.70 |
0.01 |
0.70 |
Y |
~ |
X3 |
-0.05 |
0.01 |
-0.05 |
Y |
~~ |
Y |
0.05 |
0.00 |
0.05 |
X1 |
~~ |
X1 |
1.00 |
0.00 |
1.00 |
X1 |
~~ |
X2 |
0.20 |
0.00 |
0.20 |
X1 |
~~ |
X3 |
0.24 |
0.00 |
0.24 |
X2 |
~~ |
X2 |
1.00 |
0.00 |
1.00 |
X2 |
~~ |
X3 |
0.30 |
0.00 |
0.30 |
X3 |
~~ |
X3 |
1.00 |
0.00 |
1.00 |
Abre base e examina variáveis
# Abre base do enade
load(
url("http://www.labape.com.br/rprimi/SEM/exerc18/bd_enade.RData")
)
# Inspeciona base
names(bd_enade)
## [1] "nt_obj_fg" "nt_dis_fg" "nt_obj_ce" "nt_dis_ce" "cd_catad"
## [6] "ENEM.OBJ.7" "ENEM.RED.7" "in_noturno" "esc_pai7" "esc_mae7"
## [11] "Renda7" "trab_07"
# Descrve variáveis
library(psych)
describe(bd_enade) %>%
kable(digits = 2)
nt_obj_fg |
1 |
6368 |
48.34 |
18.64 |
50.00 |
48.27 |
18.53 |
12.5 |
100.0 |
87.5 |
0.05 |
-0.50 |
0.23 |
nt_dis_fg |
2 |
5598 |
48.53 |
16.15 |
50.00 |
48.79 |
18.53 |
2.5 |
95.0 |
92.5 |
-0.15 |
-0.47 |
0.22 |
nt_obj_ce |
3 |
6470 |
44.16 |
14.68 |
45.00 |
44.10 |
14.83 |
5.0 |
90.0 |
85.0 |
0.01 |
-0.35 |
0.18 |
nt_dis_ce |
4 |
5475 |
26.89 |
15.03 |
25.00 |
26.09 |
17.35 |
1.7 |
83.3 |
81.6 |
0.46 |
-0.38 |
0.20 |
cd_catad |
5 |
6764 |
1.81 |
0.39 |
2.00 |
1.89 |
0.00 |
1.0 |
2.0 |
1.0 |
-1.60 |
0.54 |
0.00 |
ENEM.OBJ.7 |
6 |
6764 |
63.21 |
15.30 |
63.49 |
63.69 |
16.47 |
0.0 |
100.0 |
100.0 |
-0.29 |
-0.44 |
0.19 |
ENEM.RED.7 |
7 |
6659 |
65.27 |
13.14 |
65.00 |
65.42 |
14.83 |
25.0 |
100.0 |
75.0 |
-0.11 |
-0.24 |
0.16 |
in_noturno |
8 |
6764 |
0.60 |
0.49 |
1.00 |
0.62 |
0.00 |
0.0 |
1.0 |
1.0 |
-0.40 |
-1.84 |
0.01 |
esc_pai7 |
9 |
5870 |
3.28 |
2.00 |
4.00 |
3.19 |
2.97 |
0.0 |
7.0 |
7.0 |
0.22 |
-1.14 |
0.03 |
esc_mae7 |
10 |
6071 |
3.58 |
2.04 |
4.00 |
3.53 |
2.97 |
0.0 |
7.0 |
7.0 |
0.07 |
-1.18 |
0.03 |
Renda7 |
11 |
6085 |
3.12 |
1.14 |
3.00 |
3.05 |
1.48 |
1.0 |
7.0 |
6.0 |
0.60 |
0.64 |
0.01 |
trab_07 |
12 |
6034 |
1.66 |
0.70 |
2.00 |
1.57 |
1.48 |
1.0 |
3.0 |
2.0 |
0.59 |
-0.81 |
0.01 |
# Correlaciona
bd_enade %>%
select(nt_obj_ce, ENEM.OBJ.7, cd_catad) %>%
pairs.panels(pch =4, cex=0.5)
Testa modelo IES publica -> enem -> enade
# Modelo cat_adm -> enem -> enade
mod <- "
nt_obj_ce ~ a*publ + c*ENEM.OBJ.7
ENEM.OBJ.7 ~ b*publ
ind:= b*c
"
# Estima modelo
fit <-
bd_enade %>%
mutate(
publ = ifelse(cd_catad==1, 1, 0)
) %>%
sem(mod, data = .)
summary(fit, standardized=TRUE, rsq=TRUE)
## lavaan (0.5-23.1097) converged normally after 35 iterations
##
## Used Total
## Number of observations 6470 6764
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## nt_obj_ce ~
## publ (a) 0.998 0.469 2.127 0.033 0.998 0.025
## ENEM.OBJ.7 (c) 0.418 0.011 37.109 0.000 0.418 0.431
## ENEM.OBJ.7 ~
## publ (b) 11.171 0.498 22.417 0.000 11.171 0.268
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .nt_obj_ce 174.091 3.061 56.877 0.000 174.091 0.808
## .ENEM.OBJ.7 211.654 3.721 56.877 0.000 211.654 0.928
##
## R-Square:
## Estimate
## nt_obj_ce 0.192
## ENEM.OBJ.7 0.072
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ind 4.674 0.244 19.188 0.000 4.674 0.116
# plota modelo
semPaths(fit,
whatLabels = "std",
sizeMan = 12,
shapeMan = "rectangle" ,
edge.label.cex=1.5,
nodeLabels = c("enade", "enem", "publ"),
node.width = 2,
label.prop = .6
)
Testa modelo enem -> IES pública -> enade
# Modelo cat_adm -> enem -> enade
mod <- "
nt_obj_ce ~ a*ENEM.OBJ.7 + c*publ
publ ~ b*ENEM.OBJ.7
ind:= b*c
"
# Estima modelo
fit <-
bd_enade %>%
mutate(
publ = ifelse(cd_catad==1, 1, 0)
) %>%
sem(mod, data = .)
summary(fit, standardized=TRUE, rsq=TRUE)
## lavaan (0.5-23.1097) converged normally after 28 iterations
##
## Used Total
## Number of observations 6470 6764
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## nt_obj_ce ~
## ENEM.OBJ.7 (a) 0.418 0.011 37.109 0.000 0.418 0.431
## publ (c) 0.998 0.469 2.127 0.033 0.998 0.025
## publ ~
## ENEM.OBJ.7 (b) 0.006 0.000 22.417 0.000 0.006 0.268
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .nt_obj_ce 174.091 3.061 56.877 0.000 174.091 0.808
## .publ 0.122 0.002 56.877 0.000 0.122 0.928
##
## R-Square:
## Estimate
## nt_obj_ce 0.192
## publ 0.072
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ind 0.006 0.003 2.117 0.034 0.006 0.007
parameterEstimates(fit, standardized=TRUE) %>%
kable(digits = 2)
nt_obj_ce |
~ |
ENEM.OBJ.7 |
a |
0.42 |
0.01 |
37.11 |
0.00 |
0.40 |
0.44 |
0.42 |
0.43 |
0.03 |
nt_obj_ce |
~ |
publ |
c |
1.00 |
0.47 |
2.13 |
0.03 |
0.08 |
1.92 |
1.00 |
0.02 |
0.02 |
publ |
~ |
ENEM.OBJ.7 |
b |
0.01 |
0.00 |
22.42 |
0.00 |
0.01 |
0.01 |
0.01 |
0.27 |
0.02 |
nt_obj_ce |
~~ |
nt_obj_ce |
|
174.09 |
3.06 |
56.88 |
0.00 |
168.09 |
180.09 |
174.09 |
0.81 |
0.81 |
publ |
~~ |
publ |
|
0.12 |
0.00 |
56.88 |
0.00 |
0.12 |
0.13 |
0.12 |
0.93 |
0.93 |
ENEM.OBJ.7 |
~~ |
ENEM.OBJ.7 |
|
228.09 |
0.00 |
NA |
NA |
228.09 |
228.09 |
228.09 |
1.00 |
228.09 |
ind |
:= |
b*c |
ind |
0.01 |
0.00 |
2.12 |
0.03 |
0.00 |
0.01 |
0.01 |
0.01 |
0.00 |
# plota modelo
semPaths(fit,
whatLabels = "std",
sizeMan = 12,
shapeMan = "rectangle" ,
edge.label.cex=1.5,
node.width = 2,
nodeLabels = c("enade", "publ", "enem"),
label.prop = .6
)
Referências
Beaujean, A. A. (2014). Latent Variable Modeling Using R: A Step-By-Step Guide (Edição: 1.). New York: Routledge.