###############################################################
##################  Handout #5 for ST476/576  #################
#### Comparing Means from several MV populations (MANOVA) #####
###############################################################


#################################################
################## Example 1 ####################
#### Test of hyp, and CI's on Generated Data ####
#################################################

#### Generate random samples of n1=50, n2=50, n3=50 from 3 populations
#### N(mu_1, Sigma), N(mu_2, Sigma), N(mu_3, Sigma) 

library("MASS")
library(mvnormtest)
set.seed(11)
mu1 <- c(0,0,0)
mu2 <- c(0,1,3)
mu3 <- c(0,0,0)
Sigma <- rbind(c(5,  4,  5),
               c(4, 10,  3),
               c(5,  3, 15))
n1 <- 50
n2 <- 50
n3 <- 50

X1 <- mvrnorm(n1, mu1, Sigma)
X2 <- mvrnorm(n1, mu2, Sigma)
X3 <- mvrnorm(n1, mu3, Sigma)


## Column stack the data and add an indicator variable to denote the group
X <- rbind(X1, X2, X3)
group <- factor(c(rep(1,n1), rep(2,n2), rep(3,n3)))


#### MANOVA procedure to test H_0: mu1=m2=mu3 ####

manova.obj <- manova(X ~ group)
summary(manova.obj, test="Wilks")


## Give SS matrices
B <- summary(manova.obj)$SS[[1]]
W <- summary(manova.obj)$SS[[2]]


## Perform approximate Chi Sqr Test by direct calculation 
## (just to verify what R is doing)
N <- nrow(X)
p <- ncol(X)
g <- length(unique(group))
Lambda <- det(W)/det(B+W)
a_star <- -(N-1-(p+g)/2)*log(Lambda)
p_val <- 1-pchisq(a_star, p*(g-1))
p_val

## Other possiblilities for a test statistic
summary(manova.obj, test="Pillai")
summary(manova.obj, test="Hotelling-Lawley")
summary(manova.obj, test="Roy")


#### Obtain a univariate ANOVA summary for each component ####

summary.aov(manova.obj)


#### 95% Simulataneous CI's for all pairwise diffs between component means ####


## To make simulataneous CI's just need to make sure the treatment variable 
## is called factor, the multivariate response matrix is called X, the 
## SS_res matrix is called W, and alpha is set to desired value.

factor <- group
X <- X
W <- summary(manova.obj)$SS[[2]]
alpha <- .05


## Now the following commands will calculate the CI's for you

N <- nrow(X)
p <- ncol(X)
factor <- as.numeric(factor)
levels <- unique(factor)
g <- length(levels)
conf <- 1-alpha/(p*g*(g-1))
x_bar_mat <- matrix(nrow=g,ncol=p)
n <- numeric(g)
CIs <- data.frame(matrix(nrow=p*choose(g,2), ncol=2))
names(CIs) <- c(paste(" ", 100*(1-alpha), "% Lower", sep=""), paste(" ", 100*(1-alpha), "% Upper", sep=""))

for(l in 1:g){
  x_bar_mat[l,] <- colMeans(X[factor==l,])
  n[l] <- sum(factor==l)
}

next.row <- 1
for(i in 1:p){
  for(l in 1:(g-1)){
    for(m in (l+1):g){
      lower <- (x_bar_mat[l,i]-x_bar_mat[m,i]) - qt(conf, N-g)*sqrt(W[i,i]/(N-g)*(1/n[l]+1/n[m]))
      upper <- (x_bar_mat[l,i]-x_bar_mat[m,i]) + qt(conf, N-g)*sqrt(W[i,i]/(N-g)*(1/n[l]+1/n[m]))
      descr <- paste("tau_",l, ",", i, " - ", "tau_",m, ",", i, sep="")
      row.names(CIs)[next.row] <- descr
      CIs[next.row,] <- c(format(round(lower,4),nsmall=4), format(round(upper,4),nsmall=4))
      next.row <- next.row+1
    }
  }
}

## Print the CI's
CIs


#### Create Main Effects Plots ####

## technically only the plot for the third component should be examined 
## based on the CI's

par(mfrow=c(3,1))
interaction.plot(group, rep(1,length(group)), X[,1], legend=F, main="Main Effects Plot for Group", ylab="1st Component Means")

interaction.plot(group, rep(1,length(group)), X[,2], legend=F, main="Main Effects Plot for Group", ylab="2nd Component Means")

interaction.plot(group, rep(1,length(group)), X[,3], legend=F, main="Main Effects Plot for Group", ylab="3rd Component Means")
par(mfrow=c(1,1))


## Can also use boxplots

par(mfrow=c(3,1))

plot(group, X[,1])
plot(group, X[,2])
plot(group, X[,3])  

par(mfrow=c(1,1))


###########################################################
####################### Example 2 #########################
### Test of MANOVA hyp, and pairwise CI's on Skull Data ###
###########################################################

#### Read in data ####
ex.data <- read.table(file="skull_data.txt", header=T)


#### MANOVA for Size = Period ####
Size <- as.matrix(ex.data[,1:4])
Period <- factor(ex.data[,5])

manova.obj <- manova(Size ~ Period)
summary(manova.obj,test="Wilks")


#### Check MANOVA assumptions ####

## (1) Assess marginal Normality of each components residuals
##    via histograms and QQ-plots 

resid <- manova.obj$residuals

par(mfrow=c(4,2))  

hist(Size[,1], main=names(ex.data)[1], xlab='X_1')  
qqnorm(resid[,1])  
qqline(resid[,1])  
shapiro.test(resid[,1])

hist(Size[,2], main=names(ex.data)[2], xlab='X_2')  
qqnorm(resid[,2])  
qqline(resid[,2])  
shapiro.test(resid[,2])


hist(Size[,3], main=names(ex.data)[3], xlab='X_3')  
qqnorm(resid[,3])  
qqline(resid[,3])  
shapiro.test(resid[,2])


hist(Size[,4], main=names(ex.data)[4], xlab='X_4')  
qqnorm(resid[,4])  
qqline(resid[,4]) 
shapiro.test(resid[,2])
 

par(mfrow=c(1,1))


## (2) Assess bivariate Normaility of each pair
pairs(resid)
pairs12<-Size[,1:2]
mshapiro.test(t(pairs12))
##similarly you can do the mshapiro test for other pairs


## Compare Covariance matrices for each group
S1 <- cov(Size[Period==1,])
S2 <- cov(Size[Period==2,])
S3 <- cov(Size[Period==3,])
S1
S2
S3

#testing equality of covariance matrices
BoxMTest <- function(X, cl, alpha=0.05) {
  ## Multivariate Statistical Testing for the Homogeneity of Covariance
  ## Matrices by the Box's M.
  #### Inputs:
  ## X - data matrix (Size of matrix must be n-by-p).
  ## alpha - significance level (default = 0.05).
  ## cl - factor, nlevel(cl) = g
  ## Output:
  ## MBox - the Box's M statistic.
  ## Chi-sqr. or F - the approximation statistic test.
  ## df's - degrees' of freedom of the approximation statistic test.
  ## P - observed significance level.
  ##
  ## If the groups sample-size is at least 20 (sufficiently large),
  ## Box's M test takes a Chi-square approximation; otherwise it takes
  ## an F approximation.
  ##
 
  ## Created by A. Trujillo-Ortiz and R. Hernandez-Walls
  ## Facultad de Ciencias Marinas
  ## Universidad Autonoma de Baja California
  ## Apdo. Postal 453
  ## Ensenada, Baja California
  ## Mexico.
  
  
  if (alpha <= 0 || alpha >= 1)
    stop('significance level must be between 0 and 1')

  g = nlevels(cl) ## Number of groups.
  n = table(cl) ## Vector of groups-size.

  N = nrow(X)
  p = ncol(X)

  bandera = 2
  if (any(n >= 20)) bandera = 1

  ## Partition of the group covariance matrices.
  covList <- tapply(X, rep(cl, ncol(X)), function(x, nc) cov(matrix(x,
nc=nc)),
                    ncol(X))

  deno = sum(n) - g
  suma = array(0, dim=dim(covList[[1]]))

  for (k in 1:g)
    suma = suma + (n[k] - 1) * covList[[k]]

  Sp = suma / deno ## Pooled covariance matrix.
  Falta=0

  for (k in 1:g)
    Falta = Falta + ((n[k] - 1) * log(det(covList[[k]])))
  
  MB = (sum(n) - g) * log(det(Sp)) - Falta ## Box's M statistic.
  suma1 = sum(1 / (n[1:g] - 1))
  suma2 = sum(1 / ((n[1:g] - 1)^2))
  C = (((2 * p^2) + (3 * p) - 1) / (6 * (p + 1) * (g - 1))) *
    (suma1 - (1 / deno)) ## Computing of correction factor.
  if (bandera == 1) {
    X2 = MB * (1 - C) ## Chi-square approximation.
    v = as.integer((p * (p + 1) * (g - 1)) / 2) ## Degrees of freedom.
    ## Significance value associated to the observed Chi-square statistic.
    P = pchisq(X2, v, lower=TRUE)

    cat('------------------------------------------------\n');
    cat(' MBox Chi-sqr. df P\n')
    cat('------------------------------------------------\n')
    cat(sprintf("%10.4f%11.4f%12.i%13.4f\n", MB, X2, v, P))
    cat('------------------------------------------------\n')
    if (P >= alpha) {
      cat('Covariance matrices are not significantly different.\n')
    } else {
      cat('Covariance matrices are significantly different.\n')
    }
    return(list(MBox=MB, ChiSq=X2, df=v, pValue=P))
  } else {
    ## To obtain the F approximation we first define Co, which combined to
    ## the before C value are used to estimate the denominator degrees of
    ## freedom (v2); resulting two possible cases.
    Co = (((p-1) * (p+2)) / (6 * (g-1))) * (suma2 - (1 / (deno^2)))
    if (Co - (C^2) >= 0) {
      v1 = as.integer((p * (p + 1) * (g - 1)) / 2) ## Numerator DF.
      v21 = as.integer(trunc((v1 + 2) / (Co - (C^2)))) ## Denominator DF.
      F1 = MB * ((1 - C - (v1 / v21)) / v1) ## F approximation.
      ## Significance value associated to the observed F statistic.
      P1 = pf(F1, v1, v21, lower=FALSE)

 
cat('\n------------------------------------------------------------\n')
      cat(' MBox F df1 df2 P\n')
      cat('------------------------------------------------------------\n')
      cat(sprintf("%10.4f%11.4f%11.i%14.i%13.4f\n", MB, F1, v1, v21, P1))
      cat('------------------------------------------------------------\n')
      if (P1 >= alpha) {
        cat('Covariance matrices are not significantly different.\n')
      } else {
        cat('Covariance matrices are significantly different.\n')
      }
      return(list(MBox=MB, F=F1, df1=v1, df2=v21, pValue=P1))
    } else {
      v1 = as.integer((p * (p + 1) * (g - 1)) / 2) ## Numerator df.
      v22 = as.integer(trunc((v1 + 2) / ((C^2) - Co))) ## Denominator df.
      b = v22 / (1 - C - (2 / v22))
      F2 = (v22 * MB) / (v1 * (b - MB)) ## F approximation.
      ## Significance value associated to the observed F statistic.
      P2 = pf(F2, v1, v22, lower=FALSE)
      
 
cat('\n------------------------------------------------------------\n')
      cat(' MBox F df1 df2 P\n')
      cat('------------------------------------------------------------\n')
      cat(sprintf('%10.4f%11.4f%11.i%14.i%13.4f\n', MB, F2, v1, v22, P2))
      cat('------------------------------------------------------------\n')
      
      if (P2 >= alpha) {
        cat('Covariance matrices are not significantly different.\n')
      } else {
        cat('Covariance matrices are significantly different.\n')
      }
      return(list(MBox=MB, F=F2, df1=v1, df2=v22, pValue=P2))
    }
  }
} 

BoxMTest(Size,Period)

#### 95% Simulataneous CI's for all pairwise diffs between component means ####

## To make simulataneous CI's just need to make sure the treatment variable 
 ## is called factor, the multivariate response matrix is called X, the 
 ## SS_res matrix is called W, and alpha is set to desired value.

factor <- Period
X <- Size
W <- summary(manova.obj)$SS[[2]]
alpha <- .05

## Now the following commands will calculate the CI's for you

N <- nrow(X)
p <- ncol(X)
factor <- as.numeric(factor)
levels <- unique(factor)
g <- length(levels)
conf <- 1-alpha/(p*g*(g-1))
x_bar_mat <- matrix(nrow=g,ncol=p)
n <- numeric(g)
CIs <- data.frame(matrix(nrow=p*choose(g,2), ncol=2))
names(CIs) <- c(paste(" ", 100*(1-alpha), "% Lower", sep=""), paste(" ", 100*(1-alpha), "% Upper", sep=""))

for(l in 1:g){
  x_bar_mat[l,] <- colMeans(X[factor==l,])
  n[l] <- sum(factor==l)
}

next.row <- 1
for(i in 1:p){
  for(l in 1:(g-1)){
    for(m in (l+1):g){
      lower <- (x_bar_mat[l,i]-x_bar_mat[m,i]) - qt(conf, N-g)*sqrt(W[i,i]/(N-g)*(1/n[l]+1/n[m]))
      upper <- (x_bar_mat[l,i]-x_bar_mat[m,i]) + qt(conf, N-g)*sqrt(W[i,i]/(N-g)*(1/n[l]+1/n[m]))
      descr <- paste("tau_",l, ",", i, " - ", "tau_",m, ",", i, sep="")
      row.names(CIs)[next.row] <- descr
      CIs[next.row,] <- c(format(round(lower,4),nsmall=4), format(round(upper,4),nsmall=4))
      next.row <- next.row+1
    }
  }
}

## Print the CI's
CIs


#################################################
################## Example 3 ####################
### Test of MANOVA hyp on Milk Transport Data ###
#################################################


#### Read in data ####
ex.data <- read.table(file="handout4_data.txt", header=T)


#### MANOVA for Costs = Fuel type ####
Cost <- as.matrix(ex.data[,1:3])
Fuel <- ex.data$type

manova.obj <- manova(Cost ~ Fuel)
summary(manova.obj, test="Wilks")


#### Obtain a univariate ANOVA summary for each component ####

summary.aov(manova.obj)


#### 95% Simulataneous CI's for all pairwise diffs between component means ####

## To make simulataneous CI's just need to make sure the treatment variable 
 ## is called factor, the multivariate response matrix is called X, the 
 ## SS_res matrix is called W, and alpha is set to desired value.

factor <- Fuel
X <- Cost
W <- summary(manova.obj)$SS[[2]]
alpha <- .05

## Now the following commands will calculate the CI's for you

N <- nrow(X)
p <- ncol(X)
factor <- as.numeric(factor)
levels <- unique(factor)
g <- length(levels)
conf <- 1-alpha/(p*g*(g-1))
x_bar_mat <- matrix(nrow=g,ncol=p)
n <- numeric(g)
CIs <- data.frame(matrix(nrow=p*choose(g,2), ncol=2))
names(CIs) <- c(paste(" ", 100*(1-alpha), "% Lower", sep=""), paste(" ", 100*(1-alpha), "% Upper", sep=""))

for(l in 1:g){
  x_bar_mat[l,] <- colMeans(X[factor==l,])
  n[l] <- sum(factor==l)
}

next.row <- 1
for(i in 1:p){
  for(l in 1:(g-1)){
    for(m in (l+1):g){
      lower <- (x_bar_mat[l,i]-x_bar_mat[m,i]) - qt(conf, N-g)*sqrt(W[i,i]/(N-g)*(1/n[l]+1/n[m]))
      upper <- (x_bar_mat[l,i]-x_bar_mat[m,i]) + qt(conf, N-g)*sqrt(W[i,i]/(N-g)*(1/n[l]+1/n[m]))
      descr <- paste("tau_",l, ",", i, " - ", "tau_",m, ",", i, sep="")
      row.names(CIs)[next.row] <- descr
      CIs[next.row,] <- c(format(round(lower,4),nsmall=4), format(round(upper,4),nsmall=4))
      next.row <- next.row+1
    }
  }
}

## Print the CI's
CIs


#### Create Main Effects Plots ####

## only the plot for the third component should be examined based on the CI's

interaction.plot(Fuel, rep(1,length(Fuel)), X[,3], legend=F, main="Main Effects Plot for Fuel", ylab="Capital Means")


## Can also use boxplots

plot(Fuel, X[,3], main="Boxplots for Capital Cost")