Jake Heare — Feb 11, 2014, 10:36 AM
#Check that the most recent version of R insatlled for Macs v.2.15.1
#check and set your working directory so that you can load files without designating the file location. I just open up the R script I want from the folder that I will be working in that also contains my data
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"
#load library
library( "DESeq" )
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following object is masked from 'package:stats':
xtabs
The following objects are masked from 'package:base':
anyDuplicated, append, as.data.frame, as.vector, cbind,
colnames, duplicated, eval, evalq, Filter, Find, get,
intersect, is.unsorted, lapply, Map, mapply, match, mget,
order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unlist
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Loading required package: locfit
locfit 1.5-9.1 2013-03-22
Loading required package: lattice
Welcome to 'DESeq'. For improved performance, usability and
functionality, please consider migrating to 'DESeq2'.
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"
setwd("C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq")
govsgi = read.csv("GonadvGill.csv")
write.table(govsgi,"govsgi.txt", sep="\t")
#read data into DESeq, needs to be in .txt form
CountData<- read.table("govsgi5.txt", row.names=1, header=TRUE, stringsAsFactors = F,)
# check that it is correct
head (CountData)
Go Gi
1 0 0
2 44 360
3 0 24
4 25 20
5 1682 2
6 4274 13194
#Check to see how R has interpreted Values
sapply(CountData,FUN=class)
Go Gi
"integer" "integer"
# assign factors
Treatment <- factor(c( "Go","Gi" ))
# create a new data set
cds <- newCountDataSet( CountData, Treatment )
# Estimate effective library size
LibrarySize <- estimateSizeFactors( cds )
sizeFactors( LibrarySize )
Go Gi
0.3764 2.6569
#estimate Dispersons without replication
Dispersons <- estimateDispersions( LibrarySize, method="blind", fitType = c ("local"), sharingMode="fit-only" )
# calculate differential gene expression
DE <- nbinomTest( Dispersons, "Go", "Gi" )
#check data
head (DE)
id baseMean baseMeanA baseMeanB foldChange log2FoldChange pval padj
1 1 0.000 0.00 0.0000 NaN NaN NA NA
2 2 126.200 116.90 135.4950 1.1590205 0.2129 1.000000 1.000
3 3 4.516 0.00 9.0330 Inf Inf 0.731067 1.000
4 4 36.975 66.42 7.5275 0.1133264 -3.1414 0.387010 1.000
5 5 2234.851 4468.95 0.7527 0.0001684 -12.5355 0.001703 0.467
6 6 8160.795 11355.70 4965.8903 0.4373038 -1.1933 0.675950 1.000
#plot log2 fold changes against base means
plotDE <- function (DE)
plot(
DE$baseMean,
DE$log2FoldChange,
log="x", pch=20, cex=.3,
col = ifelse (DE$padj < .05, "red", "black"))
plotDE(DE)
Warning: 2208 x values <= 0 omitted from logarithmic plot
#plot histogram of p-values
hist(DE$pval, breaks=100, col="skyblue", border="slateblue", main="")
#export data into a tab deliminted txt file
write.table(DE, "GovsGi_DESeq.txt", row.names = FALSE, sep="\t")