################################################################
#############chapter 6: descriptive statstics###################
################################################################

###############Earthquake data############################
earthquake<-c(18,14,10,15,8,15,6,11,8,7,12,11,23,16,15,25,22,20,16,23)
earthquake  
##  [1] 18 14 10 15  8 15  6 11  8  7 12 11 23 16 15 25 22 20 16 23
mean(earthquake)
## [1] 14.75
sort(earthquake) #sort data from smallest to largest
##  [1]  6  7  8  8 10 11 11 12 14 15 15 15 16 16 18 20 22 23 23 25
median(earthquake)
## [1] 15
table(earthquake)  #mode is 15, since it appears 3 times
## earthquake
##  6  7  8 10 11 12 14 15 16 18 20 22 23 25 
##  1  1  2  1  2  1  1  3  2  1  1  1  2  1
sum(table(earthquake))  #number of observations of vector earthquake
## [1] 20
var(earthquake)
## [1] 32.72368
sd<-sqrt(var(earthquake))  #standard deviation
sd
## [1] 5.720462
fivenum(earthquake)
## [1]  6.0 10.5 15.0 19.0 25.0
hist(earthquake,breaks=5)

boxplot(earthquake)

x<-seq(1:20)
plot(x,earthquake)

###############Waiting time data############################
wt<-c(16,45,16,54,15,49,12,54,91,21,33,20,27,53,24,46,39,31,41,27)
sort(wt)
##  [1] 12 15 16 16 20 21 24 27 27 31 33 39 41 45 46 49 53 54 54 91
fivenum(wt)
## [1] 12.0 20.5 32.0 47.5 91.0
Q1<-fivenum(wt)[2]
Q1
## [1] 20.5
Q3<-fivenum(wt)[4]
Q3
## [1] 47.5
IQR<-Q3-Q1
IQR
## [1] 27
Q1-1.5*IQR
## [1] -20
Q3+1.5*IQR
## [1] 88
Q3+3*IQR
## [1] 128.5
###############Stat 345 test 1 data############################
test1<-c(61,61,82,44,94,75,95,89,90,78,87,55,79,91,74,51,89,36,57,73,49,90,83,66,78,100,61,80,61,94,61,88,92,
         64,97,90,81,40,67,78,50,88,
         64,100)
#test1 <- read.table("~/teaching/stat345/S2019/notes/lecture notes/test12019",header=TRUE)
table(test1)  #mode is 5, since it appears 5 times
## test1
##  36  40  44  49  50  51  55  57  61  64  66  67  73  74  75  78  79  80 
##   1   1   1   1   1   1   1   1   5   2   1   1   1   1   1   3   1   1 
##  81  82  83  87  88  89  90  91  92  94  95  97 100 
##   1   1   1   1   2   2   3   1   1   2   1   1   2
sum(table(test1))  #number of observations, 44 students taking the test
## [1] 44
xbar<-mean(test1)
xbar
## [1] 74.61364
median(test1)
## [1] 78
v<-var(test1)  #variance of test 1 score
v
## [1] 298.6147
s<-sd(test1)
s
## [1] 17.28047
fivenum(test1)
## [1]  36.0  61.0  78.0  89.5 100.0
##Graphic summaries

stem(test1) #stem and leaf plot
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##    3 | 6
##    4 | 049
##    5 | 0157
##    6 | 111114467
##    7 | 3458889
##    8 | 012378899
##    9 | 000124457
##   10 | 00
hist(test1)

boxplot(test1)

##check if the data is roughly normal

qqnorm(test1)
qqline(test1) ##indication of nonnormal

##construct confidence interval using t distribution
##set lower.tail=FALSE to have the same function
##given by the Table V in Appendix A in your textbook.
talpha <- qt(p=0.025, df=44-1, lower.tail=FALSE)
talpha
## [1] 2.016692
UB <- xbar + talpha*s/sqrt(44) #upper bound of 95% CI
LB <- xbar - talpha*s/sqrt(44) #lower bound of 95% CI
UB
## [1] 79.86738
LB
## [1] 69.35989
######problems in ch06 hw##################
Applied7eChap06 <- read.csv("~/teaching/stat345/S2019/datasets/ds_txt/Applied7eChap06.csv", header=TRUE)
Applied7eChap06$Ex6.44
##   [1]  450  450  473  507  457  452  453 1215 1256 1145 1085 1066 1111 1364
##  [15] 1254 1396 1575 1617 1733 2753 3186 3227 3469 1911 2588 2635 2725   NA
##  [29]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##  [43]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##  [57]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##  [71]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##  [85]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
##  [99]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## [113]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
ch06.44<-Applied7eChap06$Ex6.44[1:27]
ch06.44
##  [1]  450  450  473  507  457  452  453 1215 1256 1145 1085 1066 1111 1364
## [15] 1254 1396 1575 1617 1733 2753 3186 3227 3469 1911 2588 2635 2725
stem(ch06.44)
## 
##   The decimal point is 3 digit(s) to the right of the |
## 
##   0 | 
##   0 | 5555555
##   1 | 111123344
##   1 | 6679
##   2 | 
##   2 | 6678
##   3 | 22
##   3 | 5
# Simple Dotplot
dotchart(ch06.44,
   main="Dot plot of measurements on NbOCl",
   xlab="gram-mole
per liter")