d2 <- read.table("http://math.unm.edu/~luyan/ADA117/acid.txt",header=T)
head(d2)
## conc exper
## 1 0.123 Acid1
## 2 0.109 Acid1
## 3 0.110 Acid1
## 4 0.109 Acid1
## 5 0.112 Acid1
## 6 0.109 Acid1
nrow(d2)
## [1] 161
dnew1<-d2[which(d2[,2]=="Acid1"),]
dnew2<-d2[which(d2[,2]=="Acid2"),]
nrow(dnew1)
## [1] 124
nrow(dnew2)
## [1] 37
#### Visual comparison of whether sampling distribution is close to Normal via Bootstrap
# a function to compare the bootstrap sampling distribution
# of the difference of means from two samples with
# a normal distribution with mean and SEM estimated from the data
bs.two.samp.diff.dist <- function(dat1, dat2, N = 1e4) {
n1 <- length(dat1);
n2 <- length(dat2);
# resample from data
sam1 <- matrix(sample(dat1, size = N * n1, replace = TRUE), ncol=N);
sam2 <- matrix(sample(dat2, size = N * n2, replace = TRUE), ncol=N);
# calculate the means and take difference between populations
sam1.mean <- colMeans(sam1);
sam2.mean <- colMeans(sam2);
diff.mean <- sam1.mean - sam2.mean;
# save par() settings
old.par <- par(no.readonly = TRUE)
# make smaller margins
par(mfrow=c(3,1), mar=c(3,2,2,1), oma=c(1,1,1,1))
# Histogram overlaid with kernel density curve
hist(dat1, freq = FALSE, breaks = 6
, main = paste("Sample 1", "\n"
, "n =", n1
, ", mean =", signif(mean(dat1), digits = 5)
, ", sd =", signif(sd(dat1), digits = 5))
, xlim = range(c(dat1, dat2)))
points(density(dat1), type = "l")
rug(dat1)
hist(dat2, freq = FALSE, breaks = 6
, main = paste("Sample 2", "\n"
, "n =", n2
, ", mean =", signif(mean(dat2), digits = 5)
, ", sd =", signif(sd(dat2), digits = 5))
, xlim = range(c(dat1, dat2)))
points(density(dat2), type = "l")
rug(dat2)
hist(diff.mean, freq = FALSE, breaks = 25
, main = paste("Bootstrap sampling distribution of the difference in means", "\n"
, "mean =", signif(mean(diff.mean), digits = 5)
, ", se =", signif(sd(diff.mean), digits = 5)))
# overlay a density curve for the sample means
points(density(diff.mean), type = "l")
# overlay a normal distribution, bold and red
x <- seq(min(diff.mean), max(diff.mean), length = 1000)
points(x, dnorm(x, mean = mean(diff.mean), sd = sd(diff.mean))
, type = "l", lwd = 2, col = "red")
# place a rug of points under the plot
rug(diff.mean)
# restore par() settings
par(old.par)
}
bs.two.samp.diff.dist(dnew1$conc, dnew2$conc)