Análise de variância fatorial
- Comparação de médias n grupos em delineamentos fatoriais com mais de uma variável independente ou de grupo
- Pergunta de pesquisa: há efeitos principais ? Há efeitos de interação entre as variáveis interativos (o efeito de uma VI muda nos subníveis da outra)
Prática com o senna v1 (two way ANOVA): Há diferenças em auto gestão entre alunos de séries diferentes ? Essa diferença é a mesma entre homes e mulheres? ANOVA 3 X 2 (Escolaridade X Sexo)
# Abrir banco de dados
load("senna.RData")
# Cria escolaridade e sexo como uma variável "factor"
sennav1$esc2 <- factor(sennav1$ESCOLARIDADE)
sennav1$sexo2 <- factor(sennav1$SEXO)
# ANOVA VD: auto gestão VI's: escolaridade e sexo
fit <- aov(F1.Cons ~ esc2*sexo2, data = sennav1)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## esc2 2 13.239 6.619 12.904 2.18e-05 ***
## sexo2 1 0.066 0.066 0.129 0.7207
## esc2:sexo2 2 3.290 1.645 3.207 0.0475 *
## Residuals 60 30.779 0.513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Comparações post-hoc
TukeyHSD(fit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = F1.Cons ~ esc2 * sexo2, data = sennav1)
##
## $esc2
## diff lwr upr p adj
## 7-5 -0.5942266 -1.108546 -0.07990716 0.0197400
## 9-5 -1.1221600 -1.653347 -0.59097316 0.0000118
## 9-7 -0.5279334 -1.042253 -0.01361397 0.0429873
##
## $sexo2
## diff lwr upr p adj
## 1-0 0.06220468 -0.2904923 0.4149017 0.7254824
##
## $`esc2:sexo2`
## diff lwr upr p adj
## 7:0-5:0 -0.20508351 -1.09407169 0.683904656 0.9836108
## 9:0-5:0 -0.48148148 -1.47540047 0.512437509 0.7112231
## 5:1-5:0 0.66975309 -0.25997300 1.599479170 0.2908978
## 7:1-5:0 -0.22222222 -1.21614121 0.771696769 0.9857579
## 9:1-5:0 -0.93291576 -1.86264184 -0.003189675 0.0487391
## 9:0-7:0 -0.27639797 -1.16538614 0.612590205 0.9411012
## 5:1-7:0 0.87483660 0.05824882 1.691424379 0.0289064
## 7:1-7:0 -0.01713871 -0.90612688 0.871849464 0.9999999
## 9:1-7:0 -0.72783224 -1.54442002 0.088755533 0.1072588
## 5:1-9:0 1.15123457 0.22150848 2.080960651 0.0070750
## 7:1-9:0 0.25925926 -0.73465973 1.253178250 0.9718564
## 9:1-9:0 -0.45143428 -1.38116036 0.478291806 0.7092112
## 7:1-5:1 -0.89197531 -1.82170139 0.037750775 0.0672062
## 9:1-5:1 -1.60266885 -2.46342794 -0.741909750 0.0000128
## 9:1-7:1 -0.71069354 -1.64041962 0.219032547 0.2308620
# Alguns programas em: http://www.personality-project.org/r/r.anova.html
print(model.tables(fit,"means"),digits=3)
## Tables of means
## Grand mean
##
## 3.500941
##
## esc2
## 5 7 9
## 4.07 3.48 2.95
## rep 21.00 24.00 21.00
##
## sexo2
## 0 1
## 3.47 3.53
## rep 33.00 33.00
##
## esc2:sexo2
## sexo2
## esc2 0 1
## 5 3.69 4.36
## rep 9.00 12.00
## 7 3.49 3.47
## rep 15.00 9.00
## 9 3.21 2.76
## rep 9.00 12.00
boxplot(F1.Cons ~ esc2*sexo2,data=sennav1 )
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(F1.Cons ~ interaction(esc2, sexo2, sep=" "),
connect=list(c(1,2,3),c(4,5,6)),
data = sennav1)
library(HH)
## Loading required package: lattice
## Loading required package: grid
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## Loading required package: gridExtra
##
## Attaching package: 'HH'
## The following object is masked from 'package:gplots':
##
## residplot
interaction2wt(F1.Cons ~ esc2*sexo2, data = sennav1)
Prática com o senna v1 (two way ANOVA): Há diferenças em auto gestão entre alunos de séries diferentes? Essa diferença é a mesma entre alunos com desemenho acima ou abaixo da mediana ? ANOVA 3 X 2 ( Escolaridade X Desempenho)
# Cria uma divisão pela mediana
sennav1$m_notas2 <-cut(sennav1$m_notas,
breaks =c(0, median(sennav1$m_notas, na.rm =TRUE), 10))
sennav1$sexo2 <- factor(sennav1$SEXO)
# ANOVA VD: auto gestão VI's: escolaridade e sexo
fit <- aov(F1.Cons ~ esc2*m_notas2, data = sennav1)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## esc2 2 13.228 6.614 13.446 1.54e-05 ***
## m_notas2 1 2.413 2.413 4.906 0.0306 *
## esc2:m_notas2 2 2.599 1.300 2.642 0.0796 .
## Residuals 59 29.020 0.492
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
# Comparações post-hoc
TukeyHSD(fit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = F1.Cons ~ esc2 * m_notas2, data = sennav1)
##
## $esc2
## diff lwr upr p adj
## 7-5 -0.58061 -1.089540 -0.07168007 0.0216407
## 9-5 -1.12216 -1.642528 -0.60179196 0.0000082
## 9-7 -0.54155 -1.050480 -0.03262000 0.0345090
##
## $m_notas2
## diff lwr upr p adj
## (7.33,10]-(0,7.33] 0.3617318 0.01355727 0.7099064 0.0419823
##
## $`esc2:m_notas2`
## diff lwr upr p adj
## 7:(0,7.33]-5:(0,7.33] -0.01388889 -1.04675554 1.0189778 1.0000000
## 9:(0,7.33]-5:(0,7.33] -0.37843137 -1.37627536 0.6194126 0.8723959
## 5:(7.33,10]-5:(0,7.33] 1.01111111 0.01326712 2.0089551 0.0452725
## 7:(7.33,10]-5:(0,7.33] 0.31124975 -0.73714960 1.3596491 0.9511430
## 9:(7.33,10]-5:(0,7.33] -0.45370370 -1.64635539 0.7389480 0.8709519
## 9:(0,7.33]-7:(0,7.33] -0.36454248 -1.16459755 0.4355126 0.7604845
## 5:(7.33,10]-7:(0,7.33] 1.02500000 0.22494493 1.8250551 0.0048276
## 7:(7.33,10]-7:(0,7.33] 0.32513864 -0.53714710 1.1874244 0.8750733
## 9:(7.33,10]-7:(0,7.33] -0.43981481 -1.47268147 0.5930518 0.8081073
## 5:(7.33,10]-9:(0,7.33] 1.38954248 0.63524333 2.1438416 0.0000164
## 7:(7.33,10]-9:(0,7.33] 0.68968112 -0.13032851 1.5096908 0.1476458
## 9:(7.33,10]-9:(0,7.33] -0.07527233 -1.07311632 0.9225717 0.9999217
## 7:(7.33,10]-5:(7.33,10] -0.69986136 -1.51987099 0.1201483 0.1366823
## 9:(7.33,10]-5:(7.33,10] -1.46481481 -2.46265880 -0.4669708 0.0008166
## 9:(7.33,10]-7:(7.33,10] -0.76495346 -1.81335281 0.2834459 0.2771982
# Figuras
print(model.tables(fit,"means"),digits=3)
## Tables of means
## Grand mean
##
## 3.506083
##
## esc2
## 5 7 9
## 4.07 3.49 2.95
## rep 21.00 23.00 21.00
##
## m_notas2
## (0,7.33] (7.33,10]
## 3.33 3.69
## rep 33.00 32.00
##
## esc2:m_notas2
## m_notas2
## esc2 (0,7.33] (7.33,10]
## 5 3.35 4.36
## rep 6.00 15.00
## 7 3.34 3.66
## rep 12.00 11.00
## 9 2.97 2.90
## rep 15.00 6.00
boxplot(F1.Cons ~ esc2*m_notas2,data=sennav1)
interaction2wt(F1.Cons ~ esc2*m_notas2, data = sennav1)
Análise de regressão múltipla? Qual contribuição única para predizer notas das variáveis socioemocionais ?
# Correlação entre notas e habilidades socioemocionais
library(sjPlot)
sjt.corr(sennav1[ , c("s_faltas", "m_notas", "F1.Cons", "F2.Extr", "F3.EmSt", "F4.Agre", "F5.Opns", "F6.NVLoc")] , triangle = "lower")
# Regressão múltipla VD=Notas e VI's variáveis do Senna
fit2 <- lm( m_notas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc, data=sennav1)
summary(fit2)
##
## Call:
## lm(formula = m_notas ~ F1.Cons + F2.Extr + F3.EmSt + F4.Agre +
## F5.Opns + F6.NVLoc, data = sennav1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.40730 -0.53414 0.07799 0.61265 2.09773
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.72321 1.11784 4.225 8.54e-05 ***
## F1.Cons 0.85598 0.24183 3.540 0.000797 ***
## F2.Extr -0.07189 0.25169 -0.286 0.776194
## F3.EmSt -0.44010 0.23712 -1.856 0.068537 .
## F4.Agre 0.85238 0.26088 3.267 0.001826 **
## F5.Opns -0.40424 0.25114 -1.610 0.112914
## F6.NVLoc -0.16230 0.25627 -0.633 0.529008
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9748 on 58 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3861, Adjusted R-squared: 0.3226
## F-statistic: 6.08 on 6 and 58 DF, p-value: 5.513e-05
# Regressão múltipla VD=Frequência e VI's variáveis do Senna
fit3 <- lm( s_faltas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc, data=sennav1)
summary(fit3)
##
## Call:
## lm(formula = s_faltas ~ F1.Cons + F2.Extr + F3.EmSt + F4.Agre +
## F5.Opns + F6.NVLoc, data = sennav1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.074 -12.285 -2.660 9.783 44.342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.786 19.092 3.812 0.000331 ***
## F1.Cons -13.230 4.109 -3.220 0.002085 **
## F2.Extr -3.470 4.305 -0.806 0.423382
## F3.EmSt 4.461 4.021 1.109 0.271822
## F4.Agre -6.334 4.458 -1.421 0.160688
## F5.Opns 6.987 4.281 1.632 0.108001
## F6.NVLoc -5.234 4.374 -1.196 0.236288
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.68 on 59 degrees of freedom
## Multiple R-squared: 0.2506, Adjusted R-squared: 0.1744
## F-statistic: 3.289 on 6 and 59 DF, p-value: 0.007376
sjp.lm(fit2, type = "std")
sjp.lm(fit3, type = "std")
VD: notas
|
|
m_notas
|
|
|
B
|
CI
|
std. Beta
|
CI
|
p
|
(Intercept)
|
|
4.72
|
2.49 – 6.96
|
|
|
<.001
|
F1.Cons
|
|
0.86
|
0.37 – 1.34
|
0.62
|
0.28 – 0.96
|
.001
|
F2.Extr
|
|
-0.07
|
-0.58 – 0.43
|
-0.03
|
-0.26 – 0.20
|
.776
|
F3.EmSt
|
|
-0.44
|
-0.91 – 0.03
|
-0.30
|
-0.61 – 0.02
|
.069
|
F4.Agre
|
|
0.85
|
0.33 – 1.37
|
0.41
|
0.17 – 0.66
|
.002
|
F5.Opns
|
|
-0.40
|
-0.91 – 0.10
|
-0.22
|
-0.48 – 0.05
|
.113
|
F6.NVLoc
|
|
-0.16
|
-0.68 – 0.35
|
-0.08
|
-0.33 – 0.17
|
.529
|
Observations
|
|
65
|
R2 / adj. R2
|
|
.386 / .323
|
VD: faltas
|
|
s_faltas
|
|
|
B
|
CI
|
std. Beta
|
CI
|
p
|
(Intercept)
|
|
72.79
|
34.58 – 110.99
|
|
|
<.001
|
F1.Cons
|
|
-13.23
|
-21.45 – -5.01
|
-0.62
|
-0.99 – -0.24
|
.002
|
F2.Extr
|
|
-3.47
|
-12.08 – 5.14
|
-0.10
|
-0.35 – 0.15
|
.423
|
F3.EmSt
|
|
4.46
|
-3.59 – 12.51
|
0.19
|
-0.15 – 0.54
|
.272
|
F4.Agre
|
|
-6.33
|
-15.25 – 2.59
|
-0.20
|
-0.47 – 0.07
|
.161
|
F5.Opns
|
|
6.99
|
-1.58 – 15.55
|
0.24
|
-0.05 – 0.53
|
.108
|
F6.NVLoc
|
|
-5.23
|
-13.99 – 3.52
|
-0.17
|
-0.44 – 0.11
|
.236
|
Observations
|
|
66
|
R2 / adj. R2
|
|
.251 / .174
|
Estudo de padrões desenvolvimentais e auto gestão: revisitando a relação entre nota, escolaridade e auto gestão usando regressão múltipla
|
F1.Cons
|
m_notas
|
IDADE
|
F1.Cons
|
|
|
|
m_notas
|
0.495***
|
|
|
IDADE
|
-0.504***
|
-0.378**
|
|
Computed correlation used spearman-method with listwise-deletion.
|
|
F4.Agre
|
m_notas
|
IDADE
|
F4.Agre
|
|
|
|
m_notas
|
0.424***
|
|
|
IDADE
|
-0.277*
|
-0.378**
|
|
Computed correlation used spearman-method with listwise-deletion.
|
# Correlação entre notas e habilidades socioemocionais
library(sjPlot)
sjt.corr(sennav1[ , c("F1.Cons", "m_notas", "IDADE")], triangle="lower")
sjt.corr(sennav1[ , c("F4.Agre", "m_notas", "IDADE")], triangle="lower")
# Cria escolaridade como uma variável "factor"
fit4 <- lm(F1.Cons~IDADE*m_notas2, data=sennav1)
summary(fit4) # show results
##
## Call:
## lm(formula = F1.Cons ~ IDADE * m_notas2, data = sennav1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.58980 -0.37807 0.02131 0.53516 1.68798
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.44198 1.09643 4.051 0.000146 ***
## IDADE -0.09980 0.08583 -1.163 0.249448
## m_notas2(7.33,10] 4.99861 1.64481 3.039 0.003492 **
## IDADE:m_notas2(7.33,10] -0.37367 0.13429 -2.783 0.007167 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6918 on 61 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3822, Adjusted R-squared: 0.3518
## F-statistic: 12.58 on 3 and 61 DF, p-value: 1.668e-06
sjp.lm(fit4, type = "std")
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
##
## layer
ggplot(data=sennav1, aes(x=IDADE, y=F1.Cons, color = m_notas2)) + geom_point() + geom_smooth(method="lm", se=FALSE)
|
|
F1.Cons
|
|
|
B
|
CI
|
std. Beta
|
CI
|
p
|
(Intercept)
|
|
4.44
|
2.25 – 6.63
|
|
|
<.001
|
IDADE
|
|
-0.10
|
-0.27 – 0.07
|
-0.16
|
-0.43 – 0.11
|
.249
|
m_notas2(7.33,10]
|
|
5.00
|
1.71 – 8.29
|
2.93
|
1.04 – 4.82
|
.003
|
IDADE:m_notas2(7.33,10]
|
|
-0.37
|
-0.64 – -0.11
|
-2.61
|
-4.45 – -0.77
|
.007
|
Observations
|
|
65
|
R2 / adj. R2
|
|
.382 / .352
|
Estudo dos coeficientes não padronizados e padronizados na regressão múltipla ?
# Separa base so com as variáveis de interesse
dt <- sennav1[ , c("m_notas", "F1.Cons", "F2.Extr",
"F3.EmSt", "F4.Agre", "F5.Opns", "F6.NVLoc")]
# Roda regressão múltipla
fit2 <- lm( m_notas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc, data=dt)
# sjt.lm(fit2, showStdBeta = TRUE)
# Estatísticas descritivas
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:HH':
##
## logit
describe(dt)
## vars n mean sd median trimmed mad min max range skew
## m_notas 1 65 7.33 1.18 7.33 7.33 1.27 4.67 9.88 5.21 0.00
## F1.Cons 2 66 3.50 0.85 3.36 3.51 0.86 1.22 5.00 3.78 -0.17
## F2.Extr 3 66 3.41 0.54 3.37 3.40 0.53 2.14 5.00 2.86 0.26
## F3.EmSt 4 66 3.30 0.80 3.16 3.32 1.07 1.12 4.90 3.78 -0.16
## F4.Agre 5 66 3.53 0.57 3.50 3.51 0.54 2.25 4.83 2.58 0.23
## F5.Opns 6 66 3.26 0.64 3.31 3.30 0.64 1.62 4.50 2.88 -0.49
## F6.NVLoc 7 66 2.39 0.58 2.27 2.34 0.44 1.50 4.38 2.88 0.93
## kurtosis se
## m_notas -0.66 0.15
## F1.Cons -0.27 0.11
## F2.Extr 0.07 0.07
## F3.EmSt -0.63 0.10
## F4.Agre -0.35 0.07
## F5.Opns 0.18 0.08
## F6.NVLoc 0.71 0.07
# Transforma variáveis em z e mostra estatísticas descritivas
dtz <- as.data.frame(scale(dt))
describe(dtz)
## vars n mean sd median trimmed mad min max range skew
## m_notas 1 65 0 1 0.01 0.00 1.07 -2.24 2.15 4.40 0.00
## F1.Cons 2 66 0 1 -0.16 0.02 1.01 -2.67 1.76 4.43 -0.17
## F2.Extr 3 66 0 1 -0.07 -0.02 0.97 -2.33 2.92 5.25 0.26
## F3.EmSt 4 66 0 1 -0.18 0.02 1.34 -2.73 2.01 4.74 -0.16
## F4.Agre 5 66 0 1 -0.05 -0.03 0.95 -2.24 2.28 4.52 0.23
## F5.Opns 6 66 0 1 0.07 0.05 1.01 -2.58 1.95 4.53 -0.49
## F6.NVLoc 7 66 0 1 -0.19 -0.08 0.77 -1.53 3.42 4.95 0.93
## kurtosis se
## m_notas -0.66 0.12
## F1.Cons -0.27 0.12
## F2.Extr 0.07 0.12
## F3.EmSt -0.63 0.12
## F4.Agre -0.35 0.12
## F5.Opns 0.18 0.12
## F6.NVLoc 0.71 0.12
fit3 <- lm( m_notas~F1.Cons+F2.Extr+F3.EmSt+F4.Agre+F5.Opns+F6.NVLoc, data=dtz)
sjt.lm(fit3, show.std = TRUE)
# install.packages("ggExtra")
library(ggplot2)
library(ggExtra)
#
p <- ggplot(data=dt, aes(x=F1.Cons, y=m_notas)) +
geom_point() + geom_smooth(method="lm") +
labs(title="SENNA v1.0", x="Auto Gestão", y="Média das Notas") +
scale_y_continuous(breaks=seq(1, 10, 1)) +
scale_x_continuous(breaks=seq(1, 5, 0.5)) +
geom_hline(yintercept=c(7.33, 7.33+1.18, 7.33-1.18),
colour="green", linetype="longdash") +
geom_vline(xintercept=c(3.50, 3.50+0.85, 3.50-0.85),
colour="red", linetype="longdash")
ggMarginal(p, type = "histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p2 <- ggplot(data=dtz, aes(x=F1.Cons, y=m_notas)) +
geom_point() + geom_smooth(method="lm") +
labs(title="SENNA v1.0", x="Auto Gestão", y="Média das Notas") +
scale_y_continuous(breaks=seq(-3, 3, 0.5)) +
scale_x_continuous(breaks=seq(-3, 3, 0.5)) +
geom_hline(yintercept=c(0, 1, -1),
colour="green", linetype="longdash") +
geom_vline(xintercept=c(0, 1, -1),
colour="red", linetype="longdash")
ggMarginal(p2, type = "histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Visualização da regressão múltipla
# http://www.statmethods.net/graphs/scatterplot.html
# 3D Scatterplot with Coloring and Vertical Lines
# and Regression Plane
# install.packages("scatterplot3d")
library(scatterplot3d)
attach(dtz)
fit <- lm( m_notas~F1.Cons+F4.Agre)
s3d <-scatterplot3d(F1.Cons, F4.Agre, m_notas, pch=16,
highlight.3d=TRUE, type="h")
s3d$plane3d(fit)