######################################## ######################################## ######## Polynomial Regression ######### ######################################## ## Generate the Data set.seed(10) n <- 40 x <- runif(n, 0, 10) beta_0 <- 10 beta_1 <- -3 beta_2 <- .5 eps <- rnorm(n, 0, 3) y <- beta_0 + beta_1*x + beta_2*x^2 + eps ################################ plot(x, y) ## Fit quadratic regression model xsq <- x^2 myfit1 <- lm(y ~ x + xsq) summary(myfit1) ## Fitted Curve plot plot(x, y) lines(x[order(x)], myfit1$fitted[order(x)]) ## Test whether cubic term is needed xcub <- x^3 myfit2 <- lm(y ~ x+ xsq + xcub) summary(myfit2) ########################################################## #################Example Hooker Data###################### ########################################################## hook<- read.table(file="C:/jenn/teaching/stat440540/hook.dat",sep="",col.names=c("Temp","Pres")) attach(hook) hook summary(hook) hk<-lm(Pres~Temp) summary(hk) anova(hk) #centering the data xbar<-mean(Temp) h1<-(Temp-xbar) h2<-(Temp-xbar)^2 h3<-(Temp-xbar)^3 h4<-(Temp-xbar)^4 h5<-(Temp-xbar)^5 #fit a quintic polynomial hkc<-lm(Pres~h1+h2+h3+h4+h5) summary(hkc) #fit a quintic polynomial without centering x1<-Temp x2<-Temp^2 x3<-Temp^3 x4<-Temp^4 x5<-Temp^5 hk5<-lm(Pres~x1+x2+x3+x4+x5) summary(hk5) anova(hk5) #fit a quadratic polynomial hkq<-lm(Pres~x1+x2) summary(hkq) hkq<-lm(Pres~x1+x2) summary(hkq) plot(Temp, Pres) lines(Temp[order(Temp)], hkq$fitted[order(Temp)]) abline(hk,col=4) resid<-hkq$residual plot(hkq$fitted,hkq$residual) qqnorm(resid) qqline(resid) newdata <- data.frame(x1=205,x2=205^2) predict(hkq, newdata, interval="confidence", level=.95) ############################################### ################################################ #####Interaction, bodyfat example###### ############################################### bodyfat<-read.table(file="W:/teaching/stat440540/data/CH7/CH07TA01.txt") x1 <- bodyfat$V1 x2 <- bodyfat$V2 x3 <- bodyfat$V3 y <- bodyfat$V4 x1x2<-x1*x2 x1x3<-x1*x3 x2x3<-x2*x3 myfit<-lm(y ~ x1 + x2 + x3+ x1x2 + x1x3 + x2x3) resid <- myfit$residuals myfit1<-lm(y ~ x1 + x2 + x3) resid1 <- myfit1$residuals summary(myfit) ##correlation matrix cor(cbind(x1,x2,x3,x1x2,x1x3,x2x3)) pairs(x1~x2+x3+x1x2+x1x3+x2x3) #### Check for possible interactions par(mfrow=c(3,2)) plot(x1*x2, resid1) abline(0,0,lty=2) plot(x1*x3, resid1) abline(0,0,lty=2) plot(x2*x3, resid1) abline(0,0,lty=2) plot(x1*x2, x1) abline(0,0,lty=2) plot(x1*x3, x3) abline(0,0,lty=2) plot(x2*x3, x2) abline(0,0,lty=2) ##centering x1c<-x1-mean(x1) x2c<-x2-mean(x2) x3c<-x3-mean(x3) x1cx2c<-x1c*x2c x1cx3c<-x1c*x3c x2cx3c<-x2c*x3c myfit2<-lm(y ~ x1c + x2c + x3c+ x1cx2c + x1cx3c + x2cx3c) summary(myfit2) anova(myfit2) ########################Insurance innovation example############################ ex.data<-read.table(file="W:/teaching/stat440540/Fall2013/data/insurance.txt",col.names=c("y","size","type")) n<-nrow(ex.data) attach(ex.data) myfit1<-lm(y~size+type) #as a default, mutual is treated as 0 and stock is treated as 1 myfit1 summary(myfit1) #code "type" using indicator variable, x2=0, mutual, 1 for stock for(i in 1:n){ if (ex.data$type[i]=='mutual') {ex.data$x2[i]<-0} if (ex.data$type[i]=='stock') {ex.data$x2[i]<-1} } myfit2<-lm(y~size+x2,data=ex.data) summary(myfit2) #fit with interaction terms myfit3<-lm(y~size+type+size*type) myfit4<-lm(y~size+x2+size*x2,data=ex.data) summary(myfit4) anova(myfit4) #test for \beta_2=\beta_3=0, or compare model with size and model with interaction myfit0<-lm(y~size) anova(myfit0,myfit4) ###change the categorical variable to three categories ex.data2<-read.table(file="W:/teaching/stat440540/Fall2013/data/threecate.txt") y<-ex.data2$V1 x1<-ex.data2$V2 x2<-ex.data2$V3 f.x2<-factor(x2) f.x2 myfit5<-lm(y~x1+x2) #incorrect analysis considering x2 is categorical variable myfit5 summary(myfit5) myfit6<-lm(y~x1+f.x2) #correct analysis myfit6 summary(myfit6)