DESeqGovsGiUpdate

Jake Heare — Feb 11, 2014, 10:36 AM

#Check that the most recent version of R insatlled for Macs v.2.15.1
#check and set your working directory so that you can load files without designating the file location. I just open up the R script I want from the folder that I will be working in that also contains my data
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"


#load library
library( "DESeq" )
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'

The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from 'package:stats':

    xtabs

The following objects are masked from 'package:base':

    anyDuplicated, append, as.data.frame, as.vector, cbind,
    colnames, duplicated, eval, evalq, Filter, Find, get,
    intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unlist

Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: locfit
locfit 1.5-9.1   2013-03-22
Loading required package: lattice
    Welcome to 'DESeq'. For improved performance, usability and
    functionality, please consider migrating to 'DESeq2'.
getwd()
[1] "C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq"
setwd("C:/Users/Christine Savolainen/Desktop/Bio Informatics/DESeq/DESeq")
govsgi = read.csv("GonadvGill.csv")
write.table(govsgi,"govsgi.txt", sep="\t")


#read data into DESeq, needs to be in .txt form
CountData<- read.table("govsgi5.txt", row.names=1, header=TRUE, stringsAsFactors = F,)

# check that it is correct 
head (CountData)
    Go    Gi
1    0     0
2   44   360
3    0    24
4   25    20
5 1682     2
6 4274 13194

#Check to see how R has interpreted Values
sapply(CountData,FUN=class)
       Go        Gi 
"integer" "integer" 


# assign factors
Treatment <- factor(c( "Go","Gi" )) 

# create a new data set
cds <- newCountDataSet( CountData, Treatment )

# Estimate effective library size
LibrarySize <- estimateSizeFactors( cds )
sizeFactors( LibrarySize )
    Go     Gi 
0.3764 2.6569 

#estimate Dispersons without replication
Dispersons <- estimateDispersions( LibrarySize, method="blind", fitType = c ("local"), sharingMode="fit-only" )

# calculate differential gene expression 
DE <- nbinomTest( Dispersons, "Go", "Gi" )

#check data
head (DE)
  id baseMean baseMeanA baseMeanB foldChange log2FoldChange     pval  padj
1  1    0.000      0.00    0.0000        NaN            NaN       NA    NA
2  2  126.200    116.90  135.4950  1.1590205         0.2129 1.000000 1.000
3  3    4.516      0.00    9.0330        Inf            Inf 0.731067 1.000
4  4   36.975     66.42    7.5275  0.1133264        -3.1414 0.387010 1.000
5  5 2234.851   4468.95    0.7527  0.0001684       -12.5355 0.001703 0.467
6  6 8160.795  11355.70 4965.8903  0.4373038        -1.1933 0.675950 1.000

#plot log2 fold changes against base means 
plotDE <- function (DE)
    plot(
        DE$baseMean, 
        DE$log2FoldChange,
        log="x", pch=20, cex=.3,
        col = ifelse (DE$padj < .05, "red", "black"))

plotDE(DE)      
Warning: 2208 x values <= 0 omitted from logarithmic plot

plot of chunk unnamed-chunk-1


#plot histogram of p-values
hist(DE$pval, breaks=100, col="skyblue", border="slateblue", main="")

plot of chunk unnamed-chunk-1


#export data into a tab deliminted txt file 
write.table(DE, "GovsGi_DESeq.txt", row.names = FALSE, sep="\t")