##example 11.2 in Lohr's book##### ##data is from Macdonell(1901), giving the length of the left middle finger (cm) and height ##(inches) for 3,000 criminals. example 11.2 used an SRS of size 200 from the 3,000 ##criminals. library(SDaA) data(anthsrs) n <- nrow(anthsrs) n anthsrs[1:10,] finger<-anthsrs$finger height<-anthsrs$height ## Fit the estimated regression Line myfit <- lm(height~finger, data=anthsrs) ## Fit the model 'height' = a + b*'finger' summary(myfit) ## 95% confidence intervals for beta_0 and beta_1 confint(myfit, level=.95) ## Assign the estimates to b_0 and b_1 b_0 <- myfit$coef[1] b_1 <- myfit$coef[2] ## Plot the data with the fitted LS line plot(anthsrs, main="Fitted Line Plot") abline(b_0, b_1) ##residual vs yhat resid <- myfit$residuals yhat <- myfit$fitted plot(yhat, resid) abline(0,0,lty=2) ##using population data##### data(anthrop) n <- nrow(anthrop) n anthrop[1:10,] finger2<-anthrop$finger height2<-anthrop$height ## Fit the estimated regression Line myfit2 <- lm(height2~finger2, data=anthrop) ## Fit the model 'height' = a + b*'finger' summary(myfit2) ## 95% confidence intervals for beta_0 and beta_1 confint(myfit2, level=.95) ## Assign the estimates to b_0 and b_1 b_02 <- myfit2$coef[1] b_12 <- myfit2$coef[2] ## Plot the data with the fitted LS line plot(anthrop, main="Fitted Line Plot") abline(b_02, b_12) ##residual vs yhat resid2 <- myfit2$residuals yhat2 <- myfit2$fitted plot(yhat2, resid2) abline(0,0,lty=2) ########using data anthuneq######### ##ignore weights### data(anthuneq) n <- nrow(anthuneq) n anthuneq[1:10,] finger3<-anthuneq$finger height3<-anthuneq$height prob3<-anthuneq$prob ## Fit the estimated regression Line myfit3 <- lm(height3~finger3, data=anthuneq) ## Fit the model 'height' = a + b*'finger' summary(myfit3) ####consider weights#### dsrs<- svydesign(id = ~1, prob=prob3, data = anthuneq) myfit4<-svyglm(height~finger,design=dsrs) #############survey plots################## ##plot srs data data(anthsrs) weightsrs<-rep(1,200) for(i in 1:200){ data<-anthsrs for(j in 1:200){ if(finger[i]==finger[j] & height[i]==height[j] & j!=i){ weightsrs[j]<-weightsrs[j]+1} } } dsrs<- svydesign(id = ~1, weight=weightsrs,data = anthsrs) svyplot(height~finger, design=dsrs, xlab="finger", ylab="height",style="bubble") ##plot population data data(anthrop) x<-anthrop$finger y<-anthrop$height weightpop<-rep(1,3000) for(i in 1:3000){ data<-anthrop for(j in 1:3000){ if(x[i]==x[j] & y[i]==y[j] & j!=i){ weightpop[j]<-weightpop[j]+1} } } dpop<- svydesign(id = ~1, weight=weightpop,data = anthrop) svyplot(height~finger, design=dpop, xlab="finger", ylab="height",style="bubble") ##plot the unequal probability data data(anthuneq) finger3<-anthuneq$finger height3<-anthuneq$height prob3<-anthuneq$prob weightuneq<-1/prob3 for(i in 1:200){ data<-anthuneq for(j in 1:200){ if(finger3[i]==finger3[j] & height3[i]==height3[j] & j!=i){ weightuneq[j]<-weightuneq[j]+1} } } duneq<- svydesign(id = ~1, weight=weightuneq, data = anthuneq) svyplot(height~finger, design=duneq, xlab="finger", ylab="height",style="bubble") #########design based v.s model based####### library(SDaA) data(anthsrs) n <- nrow(anthsrs) n anthsrs[1:10,] finger<-anthsrs$finger height<-anthsrs$height ## model based myfit <- lm(height~finger, data=anthsrs) ## Fit the model 'height' = a + b*'finger' summary(myfit) ##designed based dsrs<- svydesign(id = ~1, data = anthsrs) myfit5<-svyglm(height~finger,design=dsrs) ###unequal probability## data(anthuneq) n <- nrow(anthuneq) n anthuneq[1:10,] finger3<-anthuneq$finger height3<-anthuneq$height prob3<-anthuneq$prob ## model based par(mfrow=c(2,1)) myfit3 <- lm(height3~finger3, data=anthuneq) ## Fit the model 'height' = a + b*'finger' summary(myfit3) b_03 <- myfit3$coef[1] b_13 <- myfit3$coef[2] plot(height3~finger3, data=anthuneq, main="Fitted Line Plot") abline(b_03, b_13) ##design based duneq<- svydesign(id = ~1, weight=weightuneq, data = anthuneq) myfit4<-svyglm(height~finger,design=duneq) summary(myfit4) b_04 <- myfit4$coef[1] b_14 <- myfit4$coef[2] svyplot(height~finger, design=duneq, main="Fitted Line Plot") abline(b_04, b_14) #####################multiple regression#################### ##The Academic Performance Index is computed for all California schools based on standardised testing of students. ##The data sets contain information for all schools with at least 100 students and for various ##probability samples of the data. ##stype Elementary/Middle/High School ##api00 API in 2000 ##meals: Percentage of students eligible for subsidized meals ##ell:¡®English Language Learners¡¯ (percent) ##:mobility: percentage of students for whom this is the first year at the school data(api) dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc) myfit<-svyglm(api00~ell+meals+mobility, design=dstrat) summary(myfit)