############Example on three-way ANOVA##########################
################################################################
# This dataset adapt.txt is from an experiment on how plants adapt to
# cold climates. The investigators decided to study this problem after
# observing that plants that have been conditioned to cold previously
# appear to suffer less damage from the cold. Two species of potato
# were studied (species 1 and 2). Each plant was exposed to one of two
# acclimatization regimes (1= plant was kept in cold room; 0= plant
# was kept at room temperature) for several days. Later, plants were
# subjected to one of two cold temperatures (-4 degrees C is coded as
# 1; -8 degrees C is coded as 2). Two responses were measured: damage
# score for photosynthesis (photo), and damage score for ion leakage
# (leak).
# Use score for photosynthesis (photo) to be the response variable. Some of the 80 plants
# originally assigned to the treatment combinations were lost during
# the experiment period. Analyze the data from the plants that made it
# through, and assess the effects of the three experimental factors
# species, regime, and temperature on the response "photo".
potato<-read.table("~/Desktop/jenn/teaching/stat445545/data/adapt.txt",header=TRUE)
n<-nrow(potato)
n
## [1] 75
potato[1:10,]
## variety regime temp photo leak
## 1 1 0 1 4.6386 2.25
## 2 1 0 1 5.7914 4.34
## 3 1 0 1 29.3515 4.25
## 4 1 0 1 18.4173 6.14
## 5 1 0 1 2.2556 2.38
## 6 1 0 2 8.2358 16.30
## 7 1 0 2 8.1972 5.24
## 8 1 0 2 -2.4482 3.25
## 9 1 0 2 5.2820 0.88
## 10 1 0 2 -5.1283 3.98
potato$regime<-factor(potato$regime,label=c("R","C"))
potato$variety<-factor(potato$variety,label=c("s1","s2"))
potato$temp<-factor(potato$temp,label=c("-4","-8"))
attach(potato)
potato[1:10,]
## variety regime temp photo leak
## 1 s1 R -4 4.6386 2.25
## 2 s1 R -4 5.7914 4.34
## 3 s1 R -4 29.3515 4.25
## 4 s1 R -4 18.4173 6.14
## 5 s1 R -4 2.2556 2.38
## 6 s1 R -8 8.2358 16.30
## 7 s1 R -8 8.1972 5.24
## 8 s1 R -8 -2.4482 3.25
## 9 s1 R -8 5.2820 0.88
## 10 s1 R -8 -5.1283 3.98
#summary statistics
tapply(photo,regime,mean) #plants previously placed in cold adapt to cold weather better
## R C
## 23.25246 12.47348
tapply(photo,variety, mean) #variety means, variety 1 seems adapt to cold better
## s1 s2
## 12.06579 22.53129
tapply(photo,temp, mean) #plants later put in -4 or -8 C doesn't make much difference in mean damage score
## -4 -8
## 18.32070 16.99179
aggregate(photo~variety+regime+temp, data=potato, mean) #cell means
## variety regime temp photo
## 1 s1 R -4 12.090880
## 2 s2 R -4 33.887738
## 3 s1 C -4 10.121942
## 4 s2 C -4 7.915386
## 5 s1 R -8 2.827700
## 6 s2 R -8 24.765762
## 7 s1 C -8 17.403577
## 8 s2 C -8 11.906886
aggregate(photo~variety+regime+temp, data=potato, length) #cell size, unequal
## variety regime temp photo
## 1 s1 R -4 5
## 2 s2 R -4 13
## 3 s1 C -4 12
## 4 s2 C -4 7
## 5 s1 R -8 5
## 6 s2 R -8 13
## 7 s1 C -8 13
## 8 s2 C -8 7
#boxplot
par(mfrow=c(1,3))
boxplot(photo ~ variety,
data = potato,
xlab = "Variety",
ylab = "Mean damage score for photosynthesis")
boxplot(photo ~ regime,
data = potato,
xlab = "Regime",
ylab = "Mean damage score for photosynthesis")
boxplot(photo ~ temp,
data = potato,
xlab = "Temp",
ylab = "Mean damage score for photosynthesis")
par(mfrow=c(1,1))
boxplot(photo ~ variety:regime:temp,
data = potato,
xlab = "V:R:T",
ylab = "Mean score of photo")
##different cell means seem to differ in mean damage score for photosynthesis
##View interaction plots, also called profile plots
interaction.plot(regime,variety:temp,photo,type='b',
col=1:4, pch=1:4)
#variety 1 later exposed in -8c seems working better when previously put in room temp than put in cold temp, which is
#different from other combinations
#you can also plot other interaction plots using different x axis.
##fit full model
myfit0<-lm(photo~variety*regime*temp)
anova(myfit0) #type 1
## Analysis of Variance Table
##
## Response: photo
## Df Sum Sq Mean Sq F value Pr(>F)
## variety 1 2044.5 2044.50 25.1340 4.163e-06 ***
## regime 1 1050.4 1050.36 12.9126 0.0006178 ***
## temp 1 23.5 23.47 0.2885 0.5929428
## variety:regime 1 2682.0 2681.98 32.9709 2.477e-07 ***
## variety:temp 1 233.9 233.87 2.8751 0.0946011 .
## regime:temp 1 868.0 868.02 10.6710 0.0017176 **
## variety:regime:temp 1 11.8 11.78 0.1448 0.7047866
## Residuals 67 5450.0 81.34
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(car) #type 2 and 3 SS
## Loading required package: carData
Anova(myfit0,type=3) #wrong way, we need to add restrictions in the model
## Anova Table (Type III tests)
##
## Response: photo
## Sum Sq Df F value Pr(>F)
## (Intercept) 730.9 1 8.9859 0.0038141 **
## variety 1715.6 1 21.0914 1.985e-05 ***
## regime 13.7 1 0.1682 0.6830193
## temp 214.5 1 2.6372 0.1090860
## variety:regime 1145.2 1 14.0785 0.0003686 ***
## variety:temp 0.0 1 0.0004 0.9832785
## regime:temp 488.6 1 6.0064 0.0168707 *
## variety:regime:temp 11.8 1 0.1448 0.7047866
## Residuals 5450.0 67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##CORRECT WAY TO GET THE TYPE III SS
myfit<-lm(photo~variety*regime*temp, contrasts = c(variety=contr.sum,
regime=contr.sum,temp=contr.sum))
## CRITICAL!!! Unbalanced design warning.
## The contrast statement above must be included identifying
## each main effect with "contr.sum" in order for the correct
## Type III SS to be computed.
Anova(myfit,type=3) #use Type 3 for tests
## Anova Table (Type III tests)
##
## Response: photo
## Sum Sq Df F value Pr(>F)
## (Intercept) 14624.3 1 179.7839 < 2.2e-16 ***
## variety 1298.5 1 15.9634 0.0001632 ***
## regime 687.8 1 8.4560 0.0049305 **
## temp 50.6 1 0.6219 0.4331125
## variety:regime 2646.4 1 32.5332 2.881e-07 ***
## variety:temp 9.9 1 0.1219 0.7280542
## regime:temp 879.8 1 10.8155 0.0016060 **
## variety:regime:temp 11.8 1 0.1448 0.7047866
## Residuals 5450.0 67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#residual df = 67 = 75-8
###### lsmeans (least square means)
# unbalanced, means don't match with lsmeans
aggregate(photo~variety+temp, data=potato, mean) #cell means
## variety temp photo
## 1 s1 -4 10.70104
## 2 s2 -4 24.79742
## 3 s1 -8 13.35472
## 4 s2 -8 20.26516
library(lsmeans)
## The 'lsmeans' package is being deprecated.
## Users are encouraged to switch to 'emmeans'.
## See help('transition') for more information, including how
## to convert 'lsmeans' objects and scripts to work with 'emmeans'.
lsmeans(myfit, list(pairwise ~ variety*temp), adjust = "bonferroni")
## NOTE: Results may be misleading due to involvement in interactions
## $`lsmeans of variety, temp`
## variety temp lsmean SE df lower.CL upper.CL
## s1 -4 11.10641 2.400385 67 6.315223 15.89760
## s2 -4 20.90156 2.114104 67 16.681793 25.12133
## s1 -8 10.11564 2.373076 67 5.378959 14.85232
## s2 -8 18.33632 2.114104 67 14.116555 22.55609
##
## Results are averaged over the levels of: regime
## Confidence level used: 0.95
##
## $`pairwise differences of contrast`
## contrast estimate SE df t.ratio p.value
## s1,-4 - s2,-4 -9.7951513 3.198638 67 -3.062 0.0190
## s1,-4 - s1,-8 0.9907724 3.375402 67 0.294 1.0000
## s1,-4 - s2,-8 -7.2299128 3.198638 67 -2.260 0.1623
## s2,-4 - s1,-8 10.7859236 3.178195 67 3.394 0.0070
## s2,-4 - s2,-8 2.5652385 2.989794 67 0.858 1.0000
## s1,-8 - s2,-8 -8.2206852 3.178195 67 -2.587 0.0712
##
## Results are averaged over the levels of: regime
## P value adjustment: bonferroni method for 6 tests
##lsmeans are calculated as the average of the means
#s1+temp -4, mean =11.10641 (mean of all the obsns in this cell), lsmean = 11.10641 = (12.090880 [mean of s1+room+ temp -4]
#+ 10.121942 [mean of s1 + cold + temp-4] )/2
lsmeans(myfit, list(pairwise ~ variety*temp*regime), adjust = "bonferroni")
## $`lsmeans of variety, temp, regime`
## variety temp regime lsmean SE df lower.CL upper.CL
## s1 -4 R 12.090880 4.033453 67 4.040074 20.14169
## s2 -4 R 33.887738 2.501441 67 28.894840 38.88064
## s1 -8 R 2.827700 4.033453 67 -5.223106 10.87851
## s2 -8 R 24.765762 2.501441 67 19.772864 29.75866
## s1 -4 C 10.121942 2.603583 67 4.925169 15.31871
## s2 -4 C 7.915386 3.408890 67 1.111213 14.71956
## s1 -8 C 17.403577 2.501441 67 12.410679 22.39647
## s2 -8 C 11.906886 3.408890 67 5.102713 18.71106
##
## Confidence level used: 0.95
##
## $`pairwise differences of contrast`
## contrast estimate SE df t.ratio p.value
## s1,-4,R - s2,-4,R -21.7968585 4.746151 67 -4.593 0.0006
## s1,-4,R - s1,-8,R 9.2631800 5.704164 67 1.624 1.0000
## s1,-4,R - s2,-8,R -12.6748815 4.746151 67 -2.671 0.2659
## s1,-4,R - s1,-4,C 1.9689383 4.800769 67 0.410 1.0000
## s1,-4,R - s2,-4,C 4.1754943 5.281030 67 0.791 1.0000
## s1,-4,R - s1,-8,C -5.3126969 4.746151 67 -1.119 1.0000
## s1,-4,R - s2,-8,C 0.1839943 5.281030 67 0.035 1.0000
## s2,-4,R - s1,-8,R 31.0600385 4.746151 67 6.544 <.0001
## s2,-4,R - s2,-8,R 9.1219769 3.537572 67 2.579 0.3394
## s2,-4,R - s1,-4,C 23.7657968 3.610520 67 6.582 <.0001
## s2,-4,R - s2,-4,C 25.9723527 4.228208 67 6.143 <.0001
## s2,-4,R - s1,-8,C 16.4841615 3.537572 67 4.660 0.0004
## s2,-4,R - s2,-8,C 21.9808527 4.228208 67 5.199 0.0001
## s1,-8,R - s2,-8,R -21.9380615 4.746151 67 -4.622 0.0005
## s1,-8,R - s1,-4,C -7.2942417 4.800769 67 -1.519 1.0000
## s1,-8,R - s2,-4,C -5.0876857 5.281030 67 -0.963 1.0000
## s1,-8,R - s1,-8,C -14.5758769 4.746151 67 -3.071 0.0862
## s1,-8,R - s2,-8,C -9.0791857 5.281030 67 -1.719 1.0000
## s2,-8,R - s1,-4,C 14.6438199 3.610520 67 4.056 0.0037
## s2,-8,R - s2,-4,C 16.8503758 4.228208 67 3.985 0.0047
## s2,-8,R - s1,-8,C 7.3621846 3.537572 67 2.081 1.0000
## s2,-8,R - s2,-8,C 12.8588758 4.228208 67 3.041 0.0941
## s1,-4,C - s2,-4,C 2.2065560 4.289426 67 0.514 1.0000
## s1,-4,C - s1,-8,C -7.2816353 3.610520 67 -2.017 1.0000
## s1,-4,C - s2,-8,C -1.7849440 4.289426 67 -0.416 1.0000
## s2,-4,C - s1,-8,C -9.4881912 4.228208 67 -2.244 0.7879
## s2,-4,C - s2,-8,C -3.9915000 4.820899 67 -0.828 1.0000
## s1,-8,C - s2,-8,C 5.4966912 4.228208 67 1.300 1.0000
##
## P value adjustment: bonferroni method for 28 tests
##residual analysis
qqPlot(myfit$residuals, las = 1, main="QQ Plot")
## [1] 58 57
shapiro.test(myfit$resid)
##
## Shapiro-Wilk normality test
##
## data: myfit$resid
## W = 0.96858, p-value = 0.05911
plot(myfit$fitted,myfit$res,xlab="Fitted",ylab="Residuals")
#no serious departure detected
#supplemental tests for normality and constant variance
shapiro.test(myfit$resid)
##
## Shapiro-Wilk normality test
##
## data: myfit$resid
## W = 0.96858, p-value = 0.05911
##Brown-Forsythe Test (modified levene test) for constancy of the error variance, reject constant variance assumption
#p-value of the test is 0.0149, less than 0.05
leveneTest(photo~variety*temp*regime, data=potato)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 7 2.7559 0.01409 *
## 67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##check outliers
rstudent<-rstudent(myfit)
bcritical<- qt(1-0.05/(2*n), 75-8-1)
outliers <- which(abs(rstudent) > bcritical)
outliers #58 obsn is detected as outlier
## 58
## 58
#or use outlier test
outlierTest(myfit)
## rstudent unadjusted p-value Bonferonni p
## 58 -4.325043 5.2729e-05 0.0039546
#check what happened to the outlier
potato[58,]
## variety regime temp photo leak
## 58 s2 R -8 -8.5655 10.17
#variety 2 previously conidtioned in room temp and later exposed to low temp as -8c has a
#very low score for for photosynthesis
#try transformation
#box-cox transformation
min(photo) #-8.5655
## [1] -8.5655
photo<-photo+9 #make response positive
myfit2<-lm(photo~variety*regime*temp,contrasts = c(variety=contr.sum,
regime=contr.sum,temp=contr.sum))
library(MASS)
boxcox(myfit2, lambda = seq(-5, 5, length = 10))
#\lambda =1 is within 95% CI, also plots don't show serious departure,
#continue without transformation
##reduced model,
#by type III test
Anova(myfit,type=3)
## Anova Table (Type III tests)
##
## Response: photo
## Sum Sq Df F value Pr(>F)
## (Intercept) 14624.3 1 179.7839 < 2.2e-16 ***
## variety 1298.5 1 15.9634 0.0001632 ***
## regime 687.8 1 8.4560 0.0049305 **
## temp 50.6 1 0.6219 0.4331125
## variety:regime 2646.4 1 32.5332 2.881e-07 ***
## variety:temp 9.9 1 0.1219 0.7280542
## regime:temp 879.8 1 10.8155 0.0016060 **
## variety:regime:temp 11.8 1 0.1448 0.7047866
## Residuals 5450.0 67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##three way interaction is not significant, remove it
myfit3<-lm(photo~variety+regime+temp+variety:temp+variety:regime +temp:regime,
contrasts = c(variety=contr.sum, regime=contr.sum,temp=contr.sum))
##compare full model and reduced model, p-value =0.7048, do not reject reduced model
anova(myfit,myfit3)
## Analysis of Variance Table
##
## Model 1: photo ~ variety * regime * temp
## Model 2: photo ~ variety + regime + temp + variety:temp + variety:regime +
## temp:regime
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 67 5450.0
## 2 68 5461.8 -1 -11.776 0.1448 0.7048
Anova(myfit3,type=3)
## Anova Table (Type III tests)
##
## Response: photo
## Sum Sq Df F value Pr(>F)
## (Intercept) 37235 1 463.5782 < 2.2e-16 ***
## variety 1297 1 16.1476 0.0001491 ***
## regime 687 1 8.5497 0.0046908 **
## temp 40 1 0.4922 0.4853388
## variety:temp 13 1 0.1560 0.6940723
## variety:regime 2649 1 32.9771 2.381e-07 ***
## regime:temp 868 1 10.8070 0.0016024 **
## Residuals 5462 68
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#notice variety:temp is not significant, remove it
myfit4<-lm(photo~variety+regime+temp+variety:regime +temp:regime,
contrasts = c(variety=contr.sum, regime=contr.sum,temp=contr.sum))
Anova(myfit4,type=3)
## Anova Table (Type III tests)
##
## Response: photo
## Sum Sq Df F value Pr(>F)
## (Intercept) 37247 1 469.4657 < 2.2e-16 ***
## variety 1295 1 16.3254 0.0001365 ***
## regime 685 1 8.6393 0.0044719 **
## temp 44 1 0.5527 0.4597261
## variety:regime 2651 1 33.4200 1.969e-07 ***
## regime:temp 1089 1 13.7306 0.0004217 ***
## Residuals 5474 69
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#notice temp is not significant, can we remove it? No.
#because regime:temp is significant, we will keep temp in the model
##residual analysis
qqPlot(myfit4$residuals, las = 1, main="QQ Plot")
## [1] 58 57
plot(myfit4$fitted,myfit4$res,xlab="Fitted",ylab="Residuals")
#no serious departure detected
##check outliers
rstudent<-rstudent(myfit4)
bcritical2<- qt(1-0.05/(2*n), 75-6-1)
outliers <- which(abs(rstudent) > bcritical2)
outliers #58 obsn is detected as outlier
## 58
## 58
#or use outlier test
outlierTest(myfit4)
## rstudent unadjusted p-value Bonferonni p
## 58 -4.342192 4.8086e-05 0.0036065
#fnal model is photo~variety+regime+temp+variety:regime +temp:regime
#Tukey's multiple comparison
comp1<-lsmeans(myfit4, pairwise ~ temp:regime,adjust="tukey")
cld(comp1, alpha=.05,Letters=letters)
## temp regime lsmean SE df lower.CL upper.CL .group
## -4 C 17.79910 2.080579 69 13.64845 21.94974 a
## -8 R 22.81242 2.224896 69 18.37387 27.25097 a
## -8 C 23.89844 2.041053 69 19.82666 27.97023 a
## -4 R 31.97362 2.224896 69 27.53507 36.41217 b
##
## Results are averaged over the levels of: variety
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 4 estimates
## significance level used: alpha = 0.05
comp2<-lsmeans(myfit4, pairwise ~ variety:regime,
adjust="tukey")
cld(comp2,
alpha=.05,
Letters=letters)
## variety regime lsmean SE df lower.CL upper.CL .group
## s1 R 16.45929 2.816704 69 10.84012 22.07846 a
## s2 C 18.91114 2.380550 69 14.16207 23.66020 a
## s1 C 22.78641 1.782355 69 19.23071 26.34211 a
## s2 R 38.32675 1.746846 69 34.84189 41.81161 b
##
## Results are averaged over the levels of: temp
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 4 estimates
## significance level used: alpha = 0.05
#Interpret comparison results, for example, the lsmean damage score of photosynthesis
#for species 2 conditioned at room temperature
#previously is significantly higher (worse adaptability) than other combinations of factor levels of of
#variety and regime.
#also explain where is the interaction coming from, species 1 has better adaptability to the cold weather
#when previously conditioned at room temperature than those conditioned at cold temperature; while species 2
#has better adaptability to the cold weather
#when previously conditioned at cold temperature than those conditioned at room temperature