DESeqGovGi

Jake Heare — Feb 6, 2014, 1:36 PM

#Check that the most recent version of R insatlled for Macs v.2.15.1
#check and set your working directory so that you can load files without designating the file location. I just open up the R script I want from the folder that I will be working in that also contains my data
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"


#load library
library( "DESeq" )
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from 'package:stats':

    xtabs

The following objects are masked from 'package:base':

    anyDuplicated, append, as.data.frame, as.vector, cbind,
    colnames, duplicated, eval, evalq, Filter, Find, get,
    intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unlist

Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: locfit
locfit 1.5-9.1   2013-03-22
Loading required package: lattice
    Welcome to 'DESeq'. For improved performance, usability and
    functionality, please consider migrating to 'DESeq2'.
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"
setwd("C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq")
govgi = read.csv("GonadvsGill.csv")
write.table(govgi,"govgi.txt", sep="\t")


#read data into DESeq, needs to be in .txt form
CountData<- read.table("govgi8.txt", row.names=1, header=TRUE, stringsAsFactors = F,)

# check that it is correct 
head (CountData)
  Go Gi
1  0  0
2  0  0
3  0  0
4  2  0
5  3 47
6  1  9

#Check to see how R has interpreted Values
sapply(CountData,FUN=class)
       Go        Gi 
"integer" "integer" 


# assign factors
Treatment <- factor(c( "Go","Gi" )) 

# create a new data set
cds <- newCountDataSet( CountData, Treatment )

# Estimate effective library size
LibrarySize <- estimateSizeFactors( cds )
sizeFactors( LibrarySize )
    Go     Gi 
0.4315 2.3177 

#estimate Dispersons without replication
Dispersons <- estimateDispersions( LibrarySize, method="blind", sharingMode="fit-only" )
Warning: glm.fit: algorithm did not converge

# calculate differential gene expression 
DE <- nbinomTest( Dispersons, "Go", "Gi" )

#check data
head (DE)
  id baseMean baseMeanA baseMeanB foldChange log2FoldChange   pval padj
1  1    0.000     0.000     0.000        NaN            NaN     NA   NA
2  2    0.000     0.000     0.000        NaN            NaN     NA   NA
3  3    0.000     0.000     0.000        NaN            NaN     NA   NA
4  4    2.318     4.635     0.000      0.000           -Inf 0.6526    1
5  5   13.616     6.953    20.278      2.916         1.5442 0.9771    1
6  6    3.100     2.318     3.883      1.675         0.7445 1.0000    1

#plot log2 fold changes against base means 
plotDE <- function (DE)
    plot(
        DE$baseMean, 
        DE$log2FoldChange,
        log="x", pch=20, cex=.3,
        col = ifelse (DE$padj < .05, "red", "black"))

plotDE(DE)      
Warning: 5191 x values <= 0 omitted from logarithmic plot

plot of chunk unnamed-chunk-1


#plot histogram of p-values
hist(DE$pval, breaks=100, col="skyblue", border="slateblue", main="")

plot of chunk unnamed-chunk-1


#export data into a tab deliminted txt file 
write.table(DE, "GovGi_DESeq.txt", row.names = FALSE, sep="\t")