setwd("~/Dropbox/R Stat")

Análise de variância simples

Estruturas de dados no R Estruturas de dados no R

Prática com o senna v1: Há diferenças em auto gestão entre alunos de séries diferentes ?

# Abrir banco de dados
load("senna.RData")

# Análise descritivas
library(psych)
describe(sennav1[ , 24:29])
##          vars  n mean   sd median trimmed  mad  min  max range  skew
## F1.Cons     1 66 3.50 0.85   3.36    3.51 0.86 1.22 5.00  3.78 -0.17
## F2.Extr     2 66 3.41 0.54   3.37    3.40 0.53 2.14 5.00  2.86  0.26
## F3.EmSt     3 66 3.30 0.80   3.16    3.32 1.07 1.12 4.90  3.78 -0.16
## F4.Agre     4 66 3.53 0.57   3.50    3.51 0.54 2.25 4.83  2.58  0.23
## F5.Opns     5 66 3.26 0.64   3.31    3.30 0.64 1.62 4.50  2.88 -0.49
## F6.NVLoc    6 66 2.39 0.58   2.27    2.34 0.44 1.50 4.38  2.88  0.93
##          kurtosis   se
## F1.Cons     -0.27 0.11
## F2.Extr      0.07 0.07
## F3.EmSt     -0.63 0.10
## F4.Agre     -0.35 0.07
## F5.Opns      0.18 0.08
## F6.NVLoc     0.71 0.07
describeBy(sennav1[ , 24:29], group = sennav1$ESCOLARIDADE)
## group: 5
##          vars  n mean   sd median trimmed  mad  min  max range  skew
## F1.Cons     1 21 4.07 0.83   4.44    4.16 0.82 2.11 5.00  2.89 -0.61
## F2.Extr     2 21 3.61 0.51   3.67    3.63 0.49 2.56 4.56  2.00 -0.25
## F3.EmSt     3 21 3.59 0.86   3.90    3.61 1.19 2.20 4.90  2.70 -0.29
## F4.Agre     4 21 3.74 0.46   3.60    3.71 0.44 3.00 4.70  1.70  0.48
## F5.Opns     5 21 3.37 0.79   3.38    3.45 0.74 1.62 4.50  2.88 -0.69
## F6.NVLoc    6 21 2.38 0.72   2.12    2.27 0.37 1.62 4.38  2.75  1.21
##          kurtosis   se
## F1.Cons     -0.72 0.18
## F2.Extr     -0.92 0.11
## F3.EmSt     -1.41 0.19
## F4.Agre     -0.78 0.10
## F5.Opns     -0.24 0.17
## F6.NVLoc     0.79 0.16
## -------------------------------------------------------- 
## group: 7
##          vars  n mean   sd median trimmed  mad  min  max range  skew
## F1.Cons     1 24 3.48 0.56   3.36    3.48 0.62 2.61 4.33  1.72  0.08
## F2.Extr     2 24 3.41 0.55   3.35    3.36 0.41 2.43 5.00  2.57  0.94
## F3.EmSt     3 24 3.38 0.66   3.38    3.37 0.74 2.38 4.56  2.19  0.11
## F4.Agre     4 24 3.49 0.59   3.50    3.46 0.62 2.50 4.75  2.25  0.36
## F5.Opns     5 24 3.35 0.44   3.31    3.35 0.57 2.46 4.23  1.77 -0.07
## F6.NVLoc    6 24 2.41 0.59   2.30    2.39 0.68 1.50 3.50  2.00  0.36
##          kurtosis   se
## F1.Cons     -1.38 0.11
## F2.Extr      1.19 0.11
## F3.EmSt     -1.26 0.13
## F4.Agre     -0.59 0.12
## F5.Opns     -0.77 0.09
## F6.NVLoc    -1.16 0.12
## -------------------------------------------------------- 
## group: 9
##          vars  n mean   sd median trimmed  mad  min  max range  skew
## F1.Cons     1 21 2.95 0.81   2.94    2.97 0.41 1.22 4.50  3.28 -0.19
## F2.Extr     2 21 3.21 0.51   3.21    3.22 0.42 2.14 4.07  1.93 -0.10
## F3.EmSt     3 21 2.93 0.76   3.00    2.96 0.83 1.12 4.06  2.94 -0.39
## F4.Agre     4 21 3.37 0.61   3.33    3.34 0.72 2.25 4.83  2.58  0.40
## F5.Opns     5 21 3.07 0.63   3.15    3.07 0.46 1.69 4.23  2.54 -0.22
## F6.NVLoc    6 21 2.36 0.42   2.30    2.35 0.44 1.60 3.10  1.50  0.34
##          kurtosis   se
## F1.Cons     -0.13 0.18
## F2.Extr     -0.81 0.11
## F3.EmSt     -0.50 0.17
## F4.Agre     -0.34 0.13
## F5.Opns     -0.50 0.14
## F6.NVLoc    -1.09 0.09
# Cria escolaridade como uma variável "factor"
sennav1$esc2 <- factor(sennav1$ESCOLARIDADE)

# ANOVA VD: auto gestão VI: escolaridade
fit <- aov(F1.Cons ~ esc2, data = sennav1)
summary(fit)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## esc2         2  13.24   6.619   12.22 3.28e-05 ***
## Residuals   63  34.14   0.542                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Figura comparando as médias
library(gplots)
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
plotmeans(F1.Cons ~ esc2, data = sennav1, bars =TRUE, ci.label = TRUE,
          mean.labels=TRUE, digits = 2)

# Comparações post-hoc
TukeyHSD(fit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = F1.Cons ~ esc2, data = sennav1)
## 
## $esc2
##           diff       lwr           upr     p adj
## 7-5 -0.5942266 -1.122176 -6.627670e-02 0.0237294
## 9-5 -1.1221600 -1.667424 -5.768957e-01 0.0000179
## 9-7 -0.5279334 -1.055883  1.648671e-05 0.0500088

Há diferenças entre generos (t-teste)?

# Cria escolaridade como uma variável "factor"
sennav1$esc2 <- factor(sennav1$ESCOLARIDADE)
t.test(sennav1$F1.Cons~sennav1$SEXO) 
## 
##  Welch Two Sample t-test
## 
## data:  sennav1$F1.Cons by sennav1$SEXO
## t = -0.3224, df = 54.839, p-value = 0.7484
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4923841  0.3559214
## sample estimates:
## mean in group 0 mean in group 1 
##        3.466825        3.535056
t.test(sennav1$m_notas~sennav1$SEXO)
## 
##  Welch Two Sample t-test
## 
## data:  sennav1$m_notas by sennav1$SEXO
## t = -2.664, df = 62.43, p-value = 0.009814
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.3103333 -0.1869573
## sample estimates:
## mean in group 0 mean in group 1 
##        6.956650        7.705295
t.test(sennav1$F3.EmSt~sennav1$SEXO)
## 
##  Welch Two Sample t-test
## 
## data:  sennav1$F3.EmSt by sennav1$SEXO
## t = 0.95016, df = 60.293, p-value = 0.3458
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2060150  0.5788866
## sample estimates:
## mean in group 0 mean in group 1 
##        3.395905        3.209470

Análise de regressão?

# Cria escolaridade como uma variável "factor"
fit2 <- lm( m_notas ~ F1.Cons , data=sennav1)
summary(fit2) # show results
## 
## Call:
## lm(formula = m_notas ~ F1.Cons, data = sennav1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6174 -0.6358  0.0137  0.6415  3.4231 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.9825     0.5480   9.092 4.49e-13 ***
## F1.Cons       0.6682     0.1519   4.400 4.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.044 on 63 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.235,  Adjusted R-squared:  0.2229 
## F-statistic: 19.36 on 1 and 63 DF,  p-value: 4.267e-05
fitted(fit) # predicted values
##        1        2        3        4        5        6        7        8 
## 4.074074 4.074074 4.074074 4.074074 4.074074 4.074074 4.074074 4.074074 
##        9       10       11       12       13       14       15       16 
## 4.074074 4.074074 4.074074 4.074074 4.074074 4.074074 4.074074 4.074074 
##       17       18       19       20       21       22       23       24 
## 4.074074 4.074074 4.074074 4.074074 4.074074 3.479847 3.479847 3.479847 
##       25       26       27       28       29       30       31       32 
## 3.479847 3.479847 3.479847 3.479847 3.479847 3.479847 3.479847 3.479847 
##       33       34       35       36       37       38       39       40 
## 3.479847 3.479847 3.479847 3.479847 3.479847 3.479847 3.479847 3.479847 
##       41       42       43       44       45       46       47       48 
## 3.479847 3.479847 3.479847 3.479847 3.479847 2.951914 2.951914 2.951914 
##       49       50       51       52       53       54       55       56 
## 2.951914 2.951914 2.951914 2.951914 2.951914 2.951914 2.951914 2.951914 
##       57       58       59       60       61       62       63       64 
## 2.951914 2.951914 2.951914 2.951914 2.951914 2.951914 2.951914 2.951914 
##       65       66 
## 2.951914 2.951914
residuals(fit) # residuals
##            1            2            3            4            5 
## -0.851851852  0.703703704  0.592592593  0.370370370 -0.296296296 
##            6            7            8            9           10 
##  0.592592593  0.925925926  0.370370370 -0.185185185  0.925925926 
##           11           12           13           14           15 
## -1.962962963 -0.518518519 -1.296296296 -0.629629630 -0.851851852 
##           16           17           18           19           20 
##  0.370370370  0.703703704 -0.740740741  0.925925926 -0.074074074 
##           21           22           23           24           25 
##  0.925925926 -0.479847495 -0.313180828  0.020152505  0.575708061 
##           26           27           28           29           30 
## -0.813180828 -0.257625272 -0.090958606 -0.813180828  0.742374728 
##           31           32           33           34           35 
##  0.575708061 -0.479847495 -0.868736383  0.814270153 -0.146514161 
##           36           37           38           39           40 
## -0.646514161 -0.257625272  0.853485839 -0.368736383  0.242374728 
##           41           42           43           44           45 
##  0.353485839 -0.202069717  0.520152505  0.242374728  0.797930283 
##           46           47           48           49           50 
## -1.174136321  1.548085901 -0.118580766  1.381419234 -1.729691877 
##           51           52           53           54           55 
## -0.229691877 -0.451914099  0.103641457  0.159197012 -1.451914099 
##           56           57           58           59           60 
##  0.048085901 -0.285247432  0.881419234  0.325863679 -0.174136321 
##           61           62           63           64           65 
##  0.871615313  0.492530345 -0.174136321 -0.063025210  0.048085901 
##           66 
## -0.007469655
library(sjPlot)
sjp.lm(fit2, type = "std")

sjp.lm(fit2, type = "pred")
    Model 1
    B CI std. Beta CI p
(Intercept)   4.98 3.89 – 6.08     <.001
F1.Cons   0.67 0.36 – 0.97 0.48 0.27 – 0.70 <.001
Observations   65
R2 / adj. R2   .235 / .223

Simulação via shiny

Exercício 3