########################################################## ################ Logistic Regression##################### ########################################################## #################Example Chapman Data###################### chapman<- read.table(file="W:/teaching/stat434534/datasets/CHAPMAN.DAT",header=T) n<-nrow(chapman) attach(chapman) head(chapman) cor(chapman) #fit data with all the variables myfit<-glm(Cnt~Ag+S+D+Ch+H+W, family=binomial("logit")) summary(myfit) anova(myfit) confint.default(myfit) ## CIs using standard errors myfit$deviance #G^2 myfit$fit #fitted probabilities plot(myfit$fit ~ Ag, data=chapman) lines(Ag, myfit$fit, type="l", col="red") plot(myfit$fit ~ Ch, data=chapman) #predict fitted log odds when Ag=60,S=140,D=90,Ch=200,H=69,W=200 newdata <- data.frame(Ag=60,S=140,D=90,Ch=200,H=69,W=200) predict(myfit, newdata) newdata <- data.frame(Ag=44,S=124,D=80,Ch=254,H=70,W=190) #first obsn in the data predict(myfit, newdata) a<-myfit$coef[1]+myfit$coef[2]*44+myfit$coef[3]*124+myfit$coef[4]*80+ myfit$coef[5]*254+myfit$coef[6]*70+myfit$coef[7]*190 e<-exp(1) phat<-(e^a)/(1+e^a) phat #model comparisons fitAg<-glm(Cnt~Ag, family=binomial) summary(fitAg) fitW<-glm(Cnt~W, family=binomial) summary(fitW) fitHW<-glm(Cnt~H+W, family=binomial) summary(fitHW) fitCh<-glm(Cnt~Ch, family=binomial) summary(fitCh) fitSD<-glm(Cnt~S+D, family=binomial) summary(fitSD) fitintercept<-glm(Cnt~1, family=binomial) summary(fitintercept) anova(fitAg,myfit) anova(fitW, myfit) qchisq(0.95,5) #model selection upper<-formula(~Ag+S+D+Ch+H+W) model.aic = step(fitintercept, scope=list(lower= ~., upper= upper)) #recommend model Cnt ~ Ag + W + Ch #fit data with the selected variables Ag, W, Ch myfit2<-glm(Cnt~Ag+ W+ Ch, family=binomial) smyfit2<-summary(myfit2) #diagnostics infv <- c(Cnt,myfit2$fit,hatvalues(myfit2),rstandard(myfit2),cooks.distance(myfit2)) inf<-matrix(infv,I(smyfit2$df[1]+smyfit2$df[2]),5,dimnames = list(NULL, c("y", "yhat", "lev","r","C"))) inf #leverages plot(hatvalues(myfit2)) highleverage <- which(hatvalues(myfit2) > .06) highleverage hatvalues(myfit2)[highleverage] chapman[41,] #cooks distance plot(cooks.distance(myfit2)) highcook <- which((cooks.distance(myfit2)) > .05) cooks.distance(myfit2)[highcook] #delete case 41, refit data chapman2<-chapman[-41,] myfit3<-glm(Cnt~Ag+ W+ Ch, family=binomial,data=chapman2) summary(myfit3) myfit4<-glm(Cnt~Ag+ W, family=binomial,data=chapman2) summary(myfit4)