{ "contents" : "#=====Lecture 10 r code: linear models\nmetals <- read.table(\"http://lib.stat.cmu.edu/DASL/Datafiles/Pottery.html\", header=T)\nhead(metals, n=3)\ntable(metals$Site) #how many samples from each site\ntapply(X=metals$Fe, INDEX=metals$Site, FUN=mean)\n\n#now run the ANOVA model \nplot(Fe ~ Site, data=metals) #try the default plot\nFe.aov <- aov(Fe~Site, data=metals)\nsummary(Fe.aov)\n\n#alternative method of running ANOVA\nFe.lm <- lm(Fe~Site, data=metals)\nanova(Fe.lm)\nsummary(Fe.lm) #more output from lm()\n\n#Tukeys honest significant difference test\nFe.aov <- aov(Fe~Site, data=metals)\nTukeyHSD(Fe.aov)\nplot(TukeyHSD(Fe.aov))\n\n#Tests\ntable(metals$Site)\n\n#=======In-class exercise 1\nmetals <- read.table(\"metals.txt\", header=T)\nFe.lm <- lm(Fe~Site, data=metals)\nnames(Fe.lm) #find out how to get the residuals\nFe.lm$residuals\n\n#use a qq-plot to see if it is normal\nqqnorm(Fe.lm$residuals)\nqqline(Fe.lm$residuals) \n\nlibrary(MASS) \n#Kolmogorov-Smirnov test\nsigma <- summary(Fe.lm)$sigma\n?ks.test\n\n#here the ... means parameters of cumulative distribution function\n#i.e. pnorm(mean=0, sd=sigma)\nks.test(x=Fe.lm$residuals, y=pnorm, mean=0, sd=sigma)\n\n#shapiro test\nshapiro.test(Fe.lm$residuals) \n\n#equal group variances?\nboxplot(metals$Fe~metals$Site)\nboxplot(Fe~Site, data=metals)\n\n#Bartlett's test\nbartlett.test(metals$Fe, metals$Site) \n\n#Fligner test\nfligner.test(metals$Fe, metals$Site) \n\n#Conclusion: \n#cannot reject assumption of normality, \n#or assumption of equal variance\n\n#=========ANCOVA/linear regression\nmarmot <- read.table(\"Data//marmot.txt\", header=T)\nhead(marmot)\n\n#interaction plots\ninteraction.plot(x.factor=marmot$loc, trace.factor=marmot$type, \n response=marmot$len, trace.label = \"Type\",\n xlab = \"Location\", lwd=2, \n ylab = \"Mean of Marmot Whistle Length\")\n\n#plot with different colors for different threat types\nplot(marmot$dist, marmot$len, xlab = \"Distance from challenge\", \n ylab = \"Length of whistles\", type = \"n\") #\"n\" means \"do not plot\"!\npoints(marmot$dist[marmot$type == \"Dog\"], \n marmot$len[marmot$type == \"Dog\"],pch = 17, col = \"blue\")\npoints(marmot$dist[marmot$type == \"Human\"],\n marmot$len[marmot$type == \"Human\"],pch = 18, col = \"red\")\npoints(marmot$dist[marmot$type == \"RCPlane\"], \n marmot$len[marmot$type == \"RCPlane\"], pch = 19, col = \"green\")\nlegend(\"bottomleft\", bty = 'n', levels(marmot$type), \n col = c(\"blue\", \"red\", \"green\"), pch = 17:19)\n\n#model with interactions\ninteractionModel <- lm(len ~ loc + type*dist, data=marmot)\ninteractionModel\nsummary(interactionModel)\n\n#components of the summary output stored in a list\nnames(interactionModel)\n\n\n#what the formula of the call was\ninteractionModel$call\n\n#ANOVA partial f-test\ninteractionModel <- lm(len ~ loc + type*dist, data=marmot)\nnonInteractionModel <- lm(len ~ loc + type + dist, data = marmot)\nanova(nonInteractionModel,interactionModel)\n\n#=============Hands-on exercise 2\nAIC(nonInteractionModel, interactionModel)\n#or one at a time\nAIC(nonInteractionModel)\nAIC(interactionModel)\n\nlogLik(nonInteractionModel)\nlogLik(interactionModel)\n\nn <- nrow(marmot) #number of data points\n\n#number of fitted coefficients PLUS variance (+1)\n#that is the number of parameters in the model\n#also logLik returns \"df\" which is number of pars\np1 <- length(nonInteractionModel$coefficients) + 1 \np2 <- length(interactionModel$coefficients) + 1 \np1\np2\nAIC.nonIM <- -2*logLik(nonInteractionModel)+2*p1\nAIC.IM <- -2*logLik(interactionModel)+2*p2\nAIC.nonIM\nAIC.IM #lower AIC = better model\n \n#AIC corrected\nAICc.nonIM <- -2*logLik(nonInteractionModel)+\n 2*p1*(n/(n-p1-1))\nAICc.IM <- -2*logLik(interactionModel)+\n 2*p2*(n/(n-p2-1))\nAICc.nonIM\nAICc.IM #lower AIC = better model\n\n#still favor the model with the interaction term\n#since it has lower AICc\n\n\n#=========checking model assumptions\nplot(interactionModel)\nplot(interactionModel, which=2)\n\n#confidence intervals for parameters\nround(confint(interactionModel),6)\n\n# example making predictions\n\n( newobs <- data.frame(rep = c(3, 1), dist = c(10, 13), type = rep(\"Dog\", 2), loc = rep(\"A\", 2)) )\n\npredict(interactionModel, newobs, interval = \"confidence\")\npredict(interactionModel, newobs, interval = \"prediction\")\n\n\n\n", "created" : 1414521818096.000, "dirty" : false, "encoding" : "UTF-8", "folds" : "", "hash" : "3037704632", "id" : "943FAA3D", "lastKnownWriteTime" : 1414532325, "path" : "~/R/IntroR/10 Lecture r code MLM.r", "project_path" : "10 Lecture r code MLM.r", "properties" : { }, "source_on_save" : false, "type" : "r_source" }