########################################################## ############### Handout #6 for ST440/540 ############### ######### Collinearity ########### ########################################################## ################################################# ################## Two extreme cases #################### ##################################### ################################################# ########## extreme case when the predictor variables are uncorrelated ####### ex.data1<-read.table(file="W:/teaching/stat440540/data/CH7/CH07TA06.txt") x1 <- ex.data1$V1 x2 <- ex.data1$V2 y <- ex.data1$V3 plot(x1,x2) cor(cbind(y, x1, x2)) ##notice that x1 and x2 are uncorrelated ## Fit the model y = b0 + b1*x1 + b2*x2 myfit1 <- lm(y ~ x1 + x2, data=ex.data1) summary(myfit1) anova(myfit1) ## Fit the model y = b0 + b1*x2 + b2*x1 myfit1_1 <- lm(y ~ x2 + x1, data=ex.data1) summary(myfit1_1) anova(myfit1_1) ## Fit the model y = b0 + b1*x1 myfit2 <- lm(y ~ x1, data=ex.data1) summary(myfit2) anova(myfit2) ## Fit the model y = b0 + b1*x2 myfit3 <- lm(y ~ x2, data=ex.data1) summary(myfit3) anova(myfit3) ########## extreme case when the predictor variables are perfectly correlated ####### ex.data2<-read.table(file="W:/teaching/stat440540/data/CH7/CH07TA08.txt") x1 <- ex.data2$V1 x2 <- ex.data2$V2 y <- ex.data2$V3 ex.data2 plot(x1,x2) ##notice that x1 and x2 are perfectly correlated by x2= 5+.5 x1 ## Fit the model y = b0 + b1*x1 + b2*x2 myfit4 <- lm(y ~ x1 + x2, data=ex.data2) summary(myfit4) anova(myfit4) ################################################# ################## Example #################### ########## Collinearity in bodyfat data ########### ################################################# ## Read in data bodyfat<-read.table(file="W:/teaching/stat440540/data/CH7/CH07TA01.txt") #################################################### x1 <- bodyfat$V1 x2 <- bodyfat$V2 x3 <- bodyfat$V3 y <- bodyfat$V4 cor(cbind(x1,x2,x3)) pairs(x1~x2+x3) ## Fit the Estimated Regression Line by different predictor variables myfit1 <- lm(y ~ x1,data=bodyfat) summary(myfit1) anova(myfit1) myfit2 <- lm(y ~ x2,data=bodyfat) resid2<-myfit2$residual summary(myfit2) anova(myfit2) myfit3 <- lm(y ~ x1+x2,data=bodyfat) summary(myfit3) anova(myfit3) #try type II anova table library(car) Anova(myfit3,type="II") myfit4 <- lm(y ~ x1+x2+x3,data=bodyfat) summary(myfit4) anova(myfit4) myfit5<-lm(x1~x2) summary(myfit5) resid5<-myfit5$residual ##calculate R^2_Y 1|2 myfit6<-lm(resid5~resid2) summary(myfit6) ##calculate fitted value newdata <- data.frame(x1=25) predict(myfit1, newdata=newdata, interval="confidence", level=.95) newdata <- data.frame(x2=50) predict(myfit2, newdata=newdata, interval="confidence", level=.95) newdata <- data.frame(x1=25,x2=50) predict(myfit3, newdata=newdata, interval="confidence", level=.95) newdata <- data.frame(x1=25,x2=50,x3=29) predict(myfit4, newdata=newdata, interval="confidence", level=.95)