{ "contents" : "#=====Lecture 9 r code: statistics\n\n\n#===== Plot the standard normal curve\nx <- seq(-4,4,0.001)\nplot(x=x, y=dnorm(x), type=\"l\", lwd=2, col=\"blue\",yaxs=\"i\", ylim=c(0,0.45),\n xlab=\"Value of x\", ylab=\"Density\")\n\n# create the explanatory plot\npar(mfrow=c(2,1))\nx <- seq(-4,4,0.001)\nplot(x=x, y=dnorm(x), type=\"l\", lwd=2, col=\"blue\",yaxs=\"i\",\n ylim=c(0,0.45),xlab=\"Value of x\", ylab=\"Density\", xaxt=\"n\")\nlines(x=x[x<(-1.96)], y=dnorm(x[x<(-1.96)]),\n type=\"h\", col=\"#77777777\")\naxis(at=c(-4,-1.96,0,1.96, 4),lab=c(\"-4\",\"-x\", \"0\",\"+x\", \"4\"),side=1)\n\nplot(x=x, y=dnorm(x), type=\"l\", lwd=2, col=\"blue\",yaxs=\"i\",\n ylim=c(0,0.45),xlab=\"Value of x\", ylab=\"Density\", xaxt=\"n\")\nlines(x=x[x<1.96], y=dnorm(x[x<1.96]),\n type=\"h\", col=\"#77777777\")\naxis(at=c(-4,-1.96,0,1.96, 4),lab=c(\"-4\",\"-x\", \"0\",\"+x\", \"4\"),side=1)\n\n## show qnorm\nquants <- qnorm(c(0.01,0.025,0.05,0.95,0.975,0.99))\nround(quants,2)\n\n## show pnorm\npnorm(quants)\npnorm(-2.33)\npnorm(-2.33, lower.tail=F)\n\npnorm(-2.33) + pnorm(-2.33, lower.tail=F)\n\n# demonstrate dnorm and rnorm\ndnorm(quants)\nrnorm(n=10)\n\n\n\n#===random numbers\n?.Random.seed\n?set.seed\n.Random.seed[1:4]\nrnorm(1)\n.Random.seed[1:4]\nrnorm(1)\n.Random.seed[1:4]\n\nset.seed(100)\nrnorm(3)\nrnorm(3)\nset.seed(100)\nrnorm(3)\nrnorm(3)\n\n2^31-1\n-2^31\nrnorm(5)\n\n#=====using the sample function\n#roll some dice 10 times\nsample(1:6,size=10,replace=T)\n\n#flip a coin 10 times\nsample(c(\"H\",\"T\"), size=10, replace=T)\n\n#select five cards from a deck\ncards <- paste(rep(c(\"A\",2:10,\"J\",\"Q\",\"K\"),4),\n c(\"Club\",\"Diamond\",\"Heart\",\"Spade\"))\nsort(cards) #check that this worked! \nsample(cards,5)\n\n#times out of 10000 you select 0,1,2,3,4,5 hearts\nx <- vector(length=10000)\nfor (i in 1:10000) {\n x[i] <- length(grep(\"Heart\", sample(cards,5)))\n}\ntable(x)\n#Note: grep searches for a matching regular expression and return the number of matches.\n\n#first two cards both Aces\nx <- vector(length=10000)\nfor (i in 1:10000) {\n x[i] <- 2==length(grep(\"A\", sample(cards,2)))\n}\ntable(x)\n\n#4 aces out of 7 cards (Texas hold 'em)\nx <- vector(length=100000)\nfor (i in 1:100000) {\n x[i] <- length(grep(\"A\", sample(cards,7)))>=4\n}\ntable(x)\n\n#=======Hands-on exercise 1\nrequire(plyr)\nrequire(TeachingDemos)\nFbomb<-rnorm(100, mean=80, sd=10)\nf<-length(which(Fbomb>100 && Fbomb<60))/length(Fbomb)\ngib<-pnorm(0.24, mean=0,sd=1, lower.tail=T,log.p=F)\ngib\n\n#=====exploratory data analysis\nsummary(iris)\nfivenum(iris[,3]) #min, lower quart, mean, upper q, max for petal length\nsd(iris[,3]) # sd for petal length\napply(iris[,1:4], 2, sd, na.rm=T) # sd for each variable, omit spp.\napply(iris[,1:4], 2, range, na.rm=T) #min and max for each variable, omit spp.\n\nboxplot(iris)\n\npairs(iris[,1:4])\n\npairs(iris[,1:4], main = \"Edgar Anderson's Iris Data\", pch = 19,\n col = rep(c(\"#FF000055\", \"#00FF0055\", \"#0000FF55\"), c(50,50,50)))\npairs(iris[,1:4], main = \"Edgar Anderson's Iris Data\", pch = 21,\n bg = rep(c(\"red\", \"green3\", \"blue\"), table(iris$Species)))\n\n#====histogram\nhist(iris$Petal.Length, main=\"\", col=\"gray\", xlab=\"Petal length (cm)\")\ntable(iris$Species)\ntable(iris$Petal.Length)\npar(mfrow=c(1,3))\nhist(iris[iris$Species==\"setosa\",]$Petal.Length, ylim=c(0,50),\n breaks=seq(1,7,0.5), main=\"setosa\", col=\"gray\", xlab=\"Petal length (cm)\")\nhist(iris[iris$Species==\"versicolor\",]$Petal.Length, ylim=c(0,50), \n breaks=seq(1,7,0.5), main=\"versicolor\", col=\"gray\", xlab=\"Petal length (cm)\")\nhist(iris[iris$Species==\"virginica\",]$Petal.Length, ylim=c(0,50),\n breaks=seq(1,7,0.5), main=\"virginica\", col=\"gray\", xlab=\"Petal length (cm)\")\n\n#==adding density curves\npar(mfrow=c(1,1))\nhist(iris$Petal.Length, main=\"\", col=\"gray\", xlab=\"Petal length (cm)\",\n freq=F)\nlines(density(iris$Petal.Length), lwd=2, col=\"red\")\nlines(density(iris$Petal.Length,adjust=0.4), lwd=2, col=\"blue\")\nlines(density(iris$Petal.Length,adjust=2), lwd=2, col=\"green3\")\n\n\n#=====using t-test\nmethodA <- c(79.982, 80.041, 80.018, 80.041, 80.03, 80.029, 80.038, 79.968, 80.049, 80.029, 80.019, 80.002, 80.022)\nmethodB <- c(80.02, 79.939, 79.98, 79.971, 79.97, 80.029, 79.952, 79.968)\n\n#briefly examine assumptions\nboxplot(methodA, methodB, names = c(\"A\", \"B\"))\npar(mfrow = c(1,2))\nqqnorm(methodA)\nqqline(methodA)\nqqnorm(methodB); qqline(methodB)\n\nt.test(methodA, methodB)\nresultsAB <- t.test(methodA, methodB)\nnames(resultsAB)\nresultsAB$p.value\n\nvar(methodA)\nvar(methodB)\n\n#test of equal variances\nvar.test(methodA, methodB)\n\n#use equal variances\nt.test(methodA, methodB, var.equal = TRUE)\n\n#Mann-Whitney test\nwilcox.test(methodA, methodB)\n\n#======Hands-on Exercise 2\nfarb<-rnorm(30, mean=0, sd=1)\nfarg<-rexp(30, rate=1)\nfarp<-rt(30,df=Inf)\nfarn<-rcauchy(30, location=0,scale=1)\n\nqqnorm(farb)\nqqline(farb)\nqqnorm(farg)\nqqline(farg)\nqqnorm(farp)\nqqline(farp)\nqqnorm(farn)\nqqline(farn)\n", "created" : 1414089013008.000, "dirty" : false, "encoding" : "UTF-8", "folds" : "", "hash" : "4088200032", "id" : "5F5BD65D", "lastKnownWriteTime" : 1414097104, "path" : "~/R/IntroR/9 Lectre r code Clean.r", "project_path" : "9 Lectre r code Clean.r", "properties" : { }, "source_on_save" : false, "type" : "r_source" }