#### sample from normal distribution x1 <- rnorm(150, mean = 100, sd = 15) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(x1, freq = FALSE, breaks = 20) points(density(x1), type = "l") rug(x1) # boxplot boxplot(x1, horizontal=TRUE) #### QQ plots # R base graphics par(mfrow=c(1,1)) # plots the data vs their normal scores qqnorm(x1) # plots the reference line qqline(x1) #### Light-tailed symmetric (Uniform) # sample from uniform distribution x2 <- runif(150, min = 50, max = 150) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(x2, freq = FALSE, breaks = 20) points(density(x2), type = "l") rug(x2) # boxplot boxplot(x2, horizontal=TRUE) par(mfrow=c(1,1)) qqPlot(x2, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot") #### Heavy-tailed (fairly) symmetric (Normal-squared) # sample from normal distribution x3.temp <- rnorm(150, mean = 0, sd = 1) x3 <- sign(x3.temp)*x3.temp^2 * 15 + 100 par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(x3, freq = FALSE, breaks = 20) points(density(x3), type = "l") rug(x3) # boxplot boxplot(x3, horizontal=TRUE) par(mfrow=c(1,1)) qqPlot(x3, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot") #### Right-skewed (Exponential) # sample from exponential distribution x4 <- rexp(150, rate = 1) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(x4, freq = FALSE, breaks = 20) points(density(x4), type = "l") rug(x4) # boxplot boxplot(x4, horizontal=TRUE) par(mfrow=c(1,1)) qqPlot(x4, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot") #### Left-skewed (Exponential, reversed) # sample from exponential distribution x5 <- 15 - rexp(150, rate = 0.5) par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(x5, freq = FALSE, breaks = 20) points(density(x5), type = "l") rug(x5) # boxplot boxplot(x5, horizontal=TRUE) par(mfrow=c(1,1)) qqPlot(x5, las = 1, id.n = 0, id.cex = 1, lwd = 1, main="QQ Plot") #### Formal Tests of Normality shapiro.test(x1) library(nortest) ad.test(x1) # lillie.test(x1) cvm.test(x1) shapiro.test(x2) library(nortest) ad.test(x2) # lillie.test(x2) cvm.test(x2) shapiro.test(x3) library(nortest) ad.test(x3) # lillie.test(x3) cvm.test(x3) shapiro.test(x4) library(nortest) ad.test(x4) # lillie.test(x4) cvm.test(x4) shapiro.test(x5) library(nortest) ad.test(x5) # lillie.test(x5) cvm.test(x5) #### Example: Paired Differences on Sleep Remedies # Data and numerical summaries a <- c( 0.7, -1.6, -0.2, -1.2, 0.1, 3.4, 3.7, 0.8, 0.0, 2.0) b <- c( 1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 5.5, 1.6, 4.6, 3.0) d <- b - a; sleep <- data.frame(a, b, d) # Normality tests shapiro.test(sleep$d) library(nortest) ad.test(sleep$d) # lillie.test(sleep$d) cvm.test(sleep$d) # plot of data par(mfrow=c(3,1)) # Histogram overlaid with kernel density curve hist(sleep$d, freq = FALSE, breaks = 20) points(density(sleep$d), type = "l") rug(sleep$d) # boxplot boxplot(sleep$d, horizontal=TRUE) # QQ plot par(mfrow=c(1,1)) qqPlot(sleep$d, las = 1, id.n = 4, id.cex = 1, lwd = 1, main="QQ Plot") #### Example: Androstenedione Levels # Data and numerical summaries men <- c(217, 123, 80, 140, 115, 135, 59, 126, 70, 63, 147, 122, 108, 70) women <- c( 84, 87, 77, 84, 73, 66, 70, 35, 77, 73, 56, 112, 56, 84, 80, 101, 66, 84) level <- c(men, women) sex <- c(rep("men", length(men)), rep("women", length(women))) andro <- data.frame(level, sex) shapiro.test(men) library(nortest) ad.test(men) # lillie.test(men) cvm.test(men) shapiro.test(women) library(nortest) ad.test(women) # lillie.test(women) cvm.test(women) #### Testing Equal Population Variances # numerical summaries c(mean(men), mean(women), sd(men), sd(women)) c(IQR(men), IQR(women), length(men), length(women)) ## Test equal variance # assumes populations are normal bartlett.test(level ~ sex, data = andro) # does not assume normality, requires car package library(car) leveneTest(level ~ sex, data = andro) # nonparametric test fligner.test(level ~ sex, data = andro)