#########################################Resampling################################################ ###################################jackknife################################################ library(survey) ex.data<-read.table(file="W:/teaching/stat579/data/tuition.txt",header=T) x<-ex.data$restuition y<-ex.data$nonrestuition bhat<-mean(y)/mean(x) ll<-nrow(ex.data) xbarj<-rep(0,ll) ybarj<-rep(0,ll) bhatj<-rep(0,ll) for(i in 1:ll){ xbarj[i]<-mean(x[-i]) ybarj[i]<-mean(y[-i]) bhatj[i]<-ybarj[i]/xbarj[i] vhat<-(ll-1)/ll*sum((bhatj-bhat)^2) } list(xbarj,ybarj,bhatj,vhat) ################easier way################################ ###install package bootstrap########################## library(bootstrap) theta1<-function(x){mean(x)} result1<-jackknife(x,theta1) ##compare to xbarj## result2<-jackknife(y,theta1) ##compare to ybarj## result3<-result2$jack.value/result1$jack.value ##compare to bhatj## ##calculate jackknife variance varjk<-sum((result3-bhat)^2)*9/10 varjk ################ more easier way################################ ##consider the ratio estimation mean(y)/mean(x) as a function ##consider data x, y as column of a matrix xdata<-matrix(c(x,y),ncol=2) n<-nrow(ex.data) theta2<-function(x,xdata){mean(xdata[x,2])/mean(xdata[x,1])} result4<-jackknife(1:n,theta2,xdata) result4 ###################################bootstrap################################################ ##simulated example## #calculating the standard error of the median #creating the data set by taking 100 observations #from a normal distribution with mean 5 and stdev 3 #we have rounded each observation to nearest integer ex.data <- round(rnorm(100,5,3)) ex.data[1:10] #obtaining 20 bootstrap samples resamples <- lapply(1:20, function(i) sample(ex.data, replace = T)) #display the first resample# resamples[1] #calculating the median for each bootstrap sample r.median <- sapply(resamples, median) r.median #calculating the standard deviation of the distribution of medians sqrt(var(r.median)) ##easier way## d<-sample(100,replace = TRUE) samplemedian <- function(x, d) { return(median(x[d])) } #using the boot function boot(ex.data, samplemedian, R=20) ##example 9.8 from Lohr's book## library(SDaA) ##library(SDaA contain all the datasets in Lohr's book) data(htpop) htpop[1:10,] median(htpop$height) ##population median### data(htsrs) ##htsrs is a simple random sample taken from htpop### htsrs[1:10,] n<-nrow(htsrs) ##number of observations in htsrs is 200 n median(htsrs$height) par(mfrow=c(2,1)) hist(htpop$height) hist(htsrs$height) ##find bootstrap variance, ci### w<-sample(200,replace = TRUE) samplemedian <- function(x, c) { return(median(x[c])) } b<-boot(htsrs$height, samplemedian, R=2000) ci<-boot.ci(b, type="basic") ###stratified cluster sample##### library(survey) data(election) ##creat data sorted, with decreasing votes in each county### sorted<-election[order(election$votes, decreasing = TRUE),] ##samples the largest five counties (based in or around ##Los Angeles, Chicago, Houston, Phoenix, and Deroit) for cetainty, then takes ##5 of the next 95, 15 of the next 900, and 15 of the remaining 3600. ##funciton one.sim() does one simulation one.sim<-function(){ insample<-c(1:5, sample(6:100,5), sample(101:1000,15), sample(1001:nrow(sorted),15)) strsample<-sorted[insample,] strsample$strat<-rep(1:4,c(5,5,15,15)) strsample$fpc<-rep(c(5,95,900,nrow(sorted)-1000),c(5,5,15,15)) strdesign<-svydesign(id=~1,strat=~strat,fpc=~fpc,data=strsample) totals<-svytotal(~Bush+Kerry+Nader, strdesign) c(total=coef(totals),se=SE(totals))} one.sim() many.sims<-replicate(100,one.sim()) ##repeats one.sim() R times apply(many.sims[1:3,],1,mean) ##computes the mean of each of the first three rows: the estimated total apply(many.sims[1:3,],1,sd) ## computes the standard deviation of the estimated totals apply(many.sims[4:6,],1,mean) ##computes the mean of the estimated standard error ##hw## library(survey) library(SDaA) data(coots) csize<-coots$csize volume<-coots$volume oweight<-csize/2 ybar<-sum(oweight*volume)/sum(oweight) ##treat the cluster sample as stratified random sample dstr <- svydesign(id = ~1, strata = ~clutch, weight=oweight, data = coots) smean<-svymean(~volume, dstr) smean dcount<-function(){ data<-coots volume<-coots$volume ln<-length(data$clutch) a<-table(data$clutch) rn<-row.names(a) ll<-length(rn) thetahat<-rep(0,ll) for(i in 1:ll){ data<-coots csize<-data$csize oweight<-csize/2 for(j in 1:ln){ if(rn[i]==data$clutch[j]){ oweight[j]<-0 } } for(k in 1:ln){ oweight[k]<-oweight[k]*ll/(ll-1) } thetahat[i]<-sum(oweight*volume)/sum(oweight) vjk<-(ll-1)/ll*sum((thetahat-ybar)^2) } list(thetahat,vjk) }