#########################################Resampling################################################

###################################jackknife################################################

library(survey)
ex.data<-read.table(file="W:/teaching/stat579/data/tuition.txt",header=T)
x<-ex.data$restuition
y<-ex.data$nonrestuition
bhat<-mean(y)/mean(x)
ll<-nrow(ex.data)
xbarj<-rep(0,ll)
ybarj<-rep(0,ll)
bhatj<-rep(0,ll)
for(i in 1:ll){ 
xbarj[i]<-mean(x[-i])
ybarj[i]<-mean(y[-i])
bhatj[i]<-ybarj[i]/xbarj[i]
vhat<-(ll-1)/ll*sum((bhatj-bhat)^2)
}
list(xbarj,ybarj,bhatj,vhat)

################easier way################################
###install package bootstrap##########################
library(bootstrap)
theta1<-function(x){mean(x)}
result1<-jackknife(x,theta1)  ##compare to xbarj##
result2<-jackknife(y,theta1)  ##compare to ybarj##
result3<-result2$jack.value/result1$jack.value ##compare to bhatj##
##calculate jackknife variance
varjk<-sum((result3-bhat)^2)*9/10
varjk

################ more easier way################################
##consider the ratio estimation mean(y)/mean(x) as a function
##consider data x, y as column of a matrix
xdata<-matrix(c(x,y),ncol=2)
n<-nrow(ex.data)
theta2<-function(x,xdata){mean(xdata[x,2])/mean(xdata[x,1])}
result4<-jackknife(1:n,theta2,xdata)
result4


###################################bootstrap################################################

##simulated example##
#calculating the standard error of the median
#creating the data set by taking 100 observations 
#from a normal distribution with mean 5 and stdev 3
#we have rounded each observation to nearest integer
ex.data <- round(rnorm(100,5,3))
ex.data[1:10]
#obtaining 20 bootstrap samples 
resamples <- lapply(1:20, function(i) sample(ex.data, replace = T))
#display the first resample#
resamples[1]
#calculating the median for each bootstrap sample 
r.median <- sapply(resamples, median)
r.median
#calculating the standard deviation of the distribution of medians
sqrt(var(r.median))

##easier way##
d<-sample(100,replace = TRUE)
samplemedian <- function(x, d) {
  return(median(x[d]))
}
#using the boot function
boot(ex.data, samplemedian, R=20)

##example 9.8 from Lohr's book##
library(SDaA) ##library(SDaA contain all the datasets in Lohr's book)
data(htpop)
htpop[1:10,]
median(htpop$height)  ##population median###
data(htsrs)  ##htsrs is a simple random sample taken from htpop###
htsrs[1:10,]  
n<-nrow(htsrs)   ##number of observations in htsrs is 200
n
median(htsrs$height)
par(mfrow=c(2,1))
hist(htpop$height)
hist(htsrs$height)
##find bootstrap variance, ci###
w<-sample(200,replace = TRUE)
samplemedian <- function(x, c) {
  return(median(x[c]))
}
b<-boot(htsrs$height, samplemedian, R=2000)
ci<-boot.ci(b, type="basic")


###stratified cluster sample#####
library(survey)
data(election) 
##creat data sorted, with decreasing votes in each county###
sorted<-election[order(election$votes, decreasing = TRUE),]
##samples the largest five counties (based in or around 
##Los Angeles, Chicago, Houston, Phoenix, and Deroit) for cetainty, then takes
##5 of the next 95, 15 of the next 900, and 15 of the remaining 3600.
##funciton one.sim() does one simulation  
one.sim<-function(){
insample<-c(1:5,
sample(6:100,5),
sample(101:1000,15),
sample(1001:nrow(sorted),15))
strsample<-sorted[insample,]
strsample$strat<-rep(1:4,c(5,5,15,15))
strsample$fpc<-rep(c(5,95,900,nrow(sorted)-1000),c(5,5,15,15))
strdesign<-svydesign(id=~1,strat=~strat,fpc=~fpc,data=strsample)
totals<-svytotal(~Bush+Kerry+Nader, strdesign)
c(total=coef(totals),se=SE(totals))}
one.sim()


many.sims<-replicate(100,one.sim()) ##repeats one.sim() R times
apply(many.sims[1:3,],1,mean) ##computes the mean of each of the first three rows: the estimated total
apply(many.sims[1:3,],1,sd) ## computes the standard deviation of the estimated totals
apply(many.sims[4:6,],1,mean) ##computes the mean of the estimated standard error


##hw##
library(survey)
library(SDaA)
data(coots)
csize<-coots$csize
volume<-coots$volume
oweight<-csize/2
ybar<-sum(oweight*volume)/sum(oweight)

##treat the cluster sample as stratified random sample
dstr <- svydesign(id = ~1, strata = ~clutch, weight=oweight, data = coots)
smean<-svymean(~volume, dstr)
smean

dcount<-function(){
     
     data<-coots
     volume<-coots$volume
     ln<-length(data$clutch)
     a<-table(data$clutch)
     rn<-row.names(a)
     ll<-length(rn)
     thetahat<-rep(0,ll)
     

    for(i in 1:ll){
        data<-coots
        csize<-data$csize
        oweight<-csize/2
       for(j in 1:ln){
            if(rn[i]==data$clutch[j]){
                        oweight[j]<-0
                      
                         }
                     }

       for(k in 1:ln){
            
           oweight[k]<-oweight[k]*ll/(ll-1)
         
           }

thetahat[i]<-sum(oweight*volume)/sum(oweight)
vjk<-(ll-1)/ll*sum((thetahat-ybar)^2)

}
list(thetahat,vjk)
}