###################################one way random effects model################################################
##install package lme4
library(nlme)
ex.data <- read.table(file="C:/jenn/teaching/stat579/data/influent.txt",header=T)
attach(ex.data)
summary(ex.data)
##     influent        nitrogen          type      
##  Min.   :1.000   Min.   : 2.00   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:14.00   1st Qu.:1.000  
##  Median :3.000   Median :20.00   Median :2.000  
##  Mean   :3.162   Mean   :20.97   Mean   :1.865  
##  3rd Qu.:5.000   3rd Qu.:28.00   3rd Qu.:2.000  
##  Max.   :6.000   Max.   :42.00   Max.   :3.000
nrow(ex.data)
## [1] 37
head(ex.data)
##   influent nitrogen type
## 1        1       21    2
## 2        1       27    2
## 3        1       29    2
## 4        1       17    2
## 5        1       19    2
## 6        1       12    2
nrow(ex.data)
## [1] 37
head(ex.data)
##   influent nitrogen type
## 1        1       21    2
## 2        1       27    2
## 3        1       29    2
## 4        1       17    2
## 5        1       19    2
## 6        1       12    2
##descriptive statistics
tapply(nitrogen,influent,mean)
##        1        2        3        4        5        6 
## 21.55556 13.85714 16.80000 24.50000 14.40000 36.40000
tapply(nitrogen,influent,sd)
##        1        2        3        4        5        6 
## 5.746980 7.358183 4.086563 6.348228 9.502631 5.029911
tapply(nitrogen,influent,length) #unbalanced design
## 1 2 3 4 5 6 
## 9 7 5 6 5 5
#declare grouping variable as a factor
influent.f<-as.factor(influent)
boxplot(nitrogen~influent.f,main="Boxplot",xlab = "Influent",
        ylab = "Nitrogen")

# fit one-way ANOVA, assuming only random error
fit1 <- aov(nitrogen~influent.f)
summary(fit1)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## influent.f   5   1925   385.0   9.044 2.2e-05 ***
## Residuals   31   1320    42.6                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(fit1$resid~fit1$fitted)

qqnorm(fit1$resid)
qqline(fit1$resid)

# fit linear regression model, see the difference and similarities
fit2<-lm(nitrogen ~ influent.f)
anova(fit2) #compare to summary(fit1)
## Analysis of Variance Table
## 
## Response: nitrogen
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## influent.f  5 1925.2  385.04  9.0441 2.202e-05 ***
## Residuals  31 1319.8   42.57                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fit a random effects model, assuming two nested errors, defaut is REML
fit3 <- lme(nitrogen~1,random=~1|influent.f)
summary(fit3)
## Linear mixed-effects model fit by REML
##  Data: NULL 
##        AIC      BIC    logLik
##   258.3511 263.1017 -126.1756
## 
## Random effects:
##  Formula: ~1 | influent.f
##         (Intercept) Residual
## StdDev:    7.957598 6.531317
## 
## Fixed effects: nitrogen ~ 1 
##                Value Std.Error DF  t-value p-value
## (Intercept) 21.22312  3.429034 31 6.189241       0
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -1.91443594 -0.53645956 -0.03217348  0.83713515  1.95823685 
## 
## Number of Observations: 37
## Number of Groups: 6
intervals(fit3)
## Approximate 95% confidence intervals
## 
##  Fixed effects:
##                lower     est.    upper
## (Intercept) 14.22956 21.22312 28.21668
## attr(,"label")
## [1] "Fixed effects:"
## 
##  Random Effects:
##   Level: influent.f 
##                    lower     est.    upper
## sd((Intercept)) 3.951429 7.957598 16.02544
## 
##  Within-group standard error:
##    lower     est.    upper 
## 5.089555 6.531317 8.381499
fixef(fit3) #overall mean, intercept
## (Intercept) 
##    21.22312
ranef(fit3) #alpha_i hat
##   (Intercept)
## 1    0.309286
## 2   -6.719332
## 3   -3.897945
## 4    2.946104
## 5   -6.012984
## 6   13.374871
yhat<-predict(fit3)
resid<-resid(fit3)
plot(resid~yhat)

par(mfrow=c(2,2))
plot(fit3) #standardized residuals v.s fitted values

qqnorm(fit3$resid) #check normality of \epsilon
qqline(fit3$resid)


#refitting model using ML
fit4 <- lme(nitrogen~1,random=~1|influent.f,method="ML") 
summary(fit4)
## Linear mixed-effects model fit by maximum likelihood
##  Data: NULL 
##       AIC      BIC    logLik
##   262.557 267.3898 -128.2785
## 
## Random effects:
##  Formula: ~1 | influent.f
##         (Intercept) Residual
## StdDev:    7.159246 6.534319
## 
## Fixed effects: nitrogen ~ 1 
##               Value Std.Error DF  t-value p-value
## (Intercept) 21.2171  3.165066 31 6.703524       0
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -1.93438234 -0.55703907 -0.03545401  0.83759580  1.93232419 
## 
## Number of Observations: 37
## Number of Groups: 6
#results should be similar to fit 3 

##using another package lme4, lmerTest
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
## 
##     lmList
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
fit5<-lmer(nitrogen~(1|influent.f), REML=TRUE,data=ex.data)
summary(fit5)
## summary from lme4 is returned
## some computational error has occurred in lmerTest
## Linear mixed model fit by REML ['lmerMod']
## Formula: nitrogen ~ (1 | influent.f)
##    Data: ex.data
## 
## REML criterion at convergence: 252.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.91444 -0.53646 -0.03217  0.83714  1.95824 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  influent.f (Intercept) 63.32    7.958   
##  Residual               42.66    6.531   
## Number of obs: 37, groups:  influent.f, 6
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   21.223      3.429   6.189
anova(fit5,df="Kenward-Roger")
## anova from lme4 is returned
## some computational error has occurred in lmerTest
## Analysis of Variance Table
##      Df Sum Sq Mean Sq F value