Jake Heare — Feb 6, 2014, 1:36 PM
#Check that the most recent version of R insatlled for Macs v.2.15.1
#check and set your working directory so that you can load files without designating the file location. I just open up the R script I want from the folder that I will be working in that also contains my data
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"
#load library
library( "DESeq" )
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following object is masked from 'package:stats':
xtabs
The following objects are masked from 'package:base':
anyDuplicated, append, as.data.frame, as.vector, cbind,
colnames, duplicated, eval, evalq, Filter, Find, get,
intersect, is.unsorted, lapply, Map, mapply, match, mget,
order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unlist
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Loading required package: locfit
locfit 1.5-9.1 2013-03-22
Loading required package: lattice
Welcome to 'DESeq'. For improved performance, usability and
functionality, please consider migrating to 'DESeq2'.
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"
setwd("C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq")
govgi = read.csv("GonadvsGill.csv")
write.table(govgi,"govgi.txt", sep="\t")
#read data into DESeq, needs to be in .txt form
CountData<- read.table("govgi8.txt", row.names=1, header=TRUE, stringsAsFactors = F,)
# check that it is correct
head (CountData)
Go Gi
1 0 0
2 0 0
3 0 0
4 2 0
5 3 47
6 1 9
#Check to see how R has interpreted Values
sapply(CountData,FUN=class)
Go Gi
"integer" "integer"
# assign factors
Treatment <- factor(c( "Go","Gi" ))
# create a new data set
cds <- newCountDataSet( CountData, Treatment )
# Estimate effective library size
LibrarySize <- estimateSizeFactors( cds )
sizeFactors( LibrarySize )
Go Gi
0.4315 2.3177
#estimate Dispersons without replication
Dispersons <- estimateDispersions( LibrarySize, method="blind", sharingMode="fit-only" )
Warning: glm.fit: algorithm did not converge
# calculate differential gene expression
DE <- nbinomTest( Dispersons, "Go", "Gi" )
#check data
head (DE)
id baseMean baseMeanA baseMeanB foldChange log2FoldChange pval padj
1 1 0.000 0.000 0.000 NaN NaN NA NA
2 2 0.000 0.000 0.000 NaN NaN NA NA
3 3 0.000 0.000 0.000 NaN NaN NA NA
4 4 2.318 4.635 0.000 0.000 -Inf 0.6526 1
5 5 13.616 6.953 20.278 2.916 1.5442 0.9771 1
6 6 3.100 2.318 3.883 1.675 0.7445 1.0000 1
#plot log2 fold changes against base means
plotDE <- function (DE)
plot(
DE$baseMean,
DE$log2FoldChange,
log="x", pch=20, cex=.3,
col = ifelse (DE$padj < .05, "red", "black"))
plotDE(DE)
Warning: 5191 x values <= 0 omitted from logarithmic plot
#plot histogram of p-values
hist(DE$pval, breaks=100, col="skyblue", border="slateblue", main="")
#export data into a tab deliminted txt file
write.table(DE, "GovGi_DESeq.txt", row.names = FALSE, sep="\t")