# On Sep 17, 2013, at 4:01 PM, Mackenzie Gavery wrote:
# 
# > Hi Jeff,
# >
# > Thanks for meeting with us.  Here is a recap of the design of the array that we discussed today.
# >
# > There were 3 competitive arrays performed:
# > A01. Input DNA from estrogen treated sample (pooled DNA) versus Input DNA from control sample (pooled DNA)
# > A02. Methylation Enriched estrogen treated sample versus Methylation Enriched control sample
# > A03. Dye swap of the methylation enriched samples in array A02
# >
# > The genome information for the Pacific oyster (scaffolds and gene location) can be found here: 
# > FASTA file (11969 scaffolds)
# > annotation of gene location on scaffolds as gff
# >
# > Thanks also for looking into how many arrays we have remaining.  Please let me know if you need anything else or have any questions.
# >
# > Best,
# >
# > Mackenzie
#

library(Ringo)
#generate annotation object
#probeAnno<-posToProbeAnno("121101_OysterV9_MG_meth.pos", genome="Pacific Oyster", microarrayPlatform="NimbleGen 121101 Oyster V9 MG meth")
#save(probeAnno,file="121101_OysterV9_MG_meth.ProbeAnnotation.rda")
#load annotation object
load("121101_OysterV9_MG_meth.ProbeAnnotation.rda")

#load gff
gff<-read.delim("oyster.v9.glean.final.rename.mRNA.gff", header=F)
#scaffold1702 GLEAN mRNA   12635   17265 1.0000000  -  . ID=CGI_10001261;
names(gff)<-c("chr","source","type","start","end","score","strand","phase","name")

#read in altered targets file such that the control samlpe is in the denominator slot (Cy3)
todaysDate<-substr(format(Sys.time(), "%Y.%m.%d."),1,11)
#RG<-readNimblegen("2013.09.24.targets.txt", "spottypes.txt", headerPattern="# software=DEVA", path="pair")
#save(RG, file="2013.10.03.RG.rda")
load("2013.10.03.RG.rda")
RG<-RG[RG$gene$Status != "RANDOM",]

#Preprocessing
NG<-preprocess(RG,method="nimblegen")
sampleNames(NG) <-with(RG$targets, paste(FileNameCy5,"vs",FileNameCy3,sep="_"))

#ProbeIDs all have ":+:" in their names, which has been converted to "..." when the preprocess function is run.  This is in conflict
#with the probeAnno
row.names(exprs(NG))<-gsub("...",":+:",row.names(exprs(NG)), fixed=TRUE)

#smooth probe intensities
smoothNG<-computeRunningMedians(NG,probeAnno=probeAnno, modColumn="FileNameCy5", winHalfSize=500)
combinedNG<-combine(NG,smoothNG)

y0 <- apply(exprs(smoothNG),2,upperBoundNull)
# A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm     A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm 
# 0.6742440                                            0.6978518 
# A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm 
# 0.7904904 

chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(y0[1],y0[2],y0[3]), distCutOff=600)
chersNG <- relateChers(chersNG, gff)
chersNGD <- as.data.frame.cherList(chersNG)
chersNGD<-chersNGD[order(chersNGD$maxLevel, decreasing=TRUE),]
nrow(chersNGD) #2670
#chers detected within each array
nrow(chersNGD[grep("A01",chersNGD$name),]) #1223 
nrow(chersNGD[grep("A02",chersNGD$name),]) #834 
nrow(chersNGD[grep("A03",chersNGD$name),]) #613

#try to write wig using the writeWIG function from Starr
library(Starr)
writeWIG(smoothNG[,1],probeAnno,file="2013.09.25.A01_smoothed.wig")
writeWIG(smoothNG[,2],probeAnno,file="2013.09.25.A02_smoothed.wig")
writeWIG(smoothNG[,3],probeAnno,file="2013.09.25.A03_smoothed.wig")
#had to change NAs to 0s in order to load into IGV


#find overlapping regions
library("ChIPpeakAnno")
#create ranged data objects
chersA01 <- findChersOnSmoothed(smoothNG[,1], probeAnno=probeAnno, thresholds=c(y0[1]), distCutOff=600)
chersA01 <- relateChers(chersA01, gff)
exportCherList(chersA01,filename="2013.09.25.chers_A01.bed",format="bed",genome="oyster")
bedA01<-read.delim("2013.09.25.chers_A01.bed", header=F)
bedA01$V6<-1
rangedA01<-BED2RangedData(bedA01)

chersA02 <- findChersOnSmoothed(smoothNG[,2], probeAnno=probeAnno, thresholds=c(y0[2]), distCutOff=600)
chersA02<- relateChers(chersA02, gff)
exportCherList(chersA02,filename="2013.09.25.chers_A02.bed",format="bed",genome="oyster")
bedA02<-read.delim("2013.09.25.chers_A02.bed", header=F)
bedA02$V6<-1
rangedA02<-BED2RangedData(bedA02)

chersA03 <- findChersOnSmoothed(smoothNG[,3], probeAnno=probeAnno, thresholds=c(y0[3]), distCutOff=600)
chersA03<- relateChers(chersA03, gff)
exportCherList(chersA03,filename="2013.09.25.chers_A03.bed",format="bed",genome="oyster")
bedA03<-read.delim("2013.09.25.chers_A03.bed", header=F)
bedA03$V6<-1
rangedA03<-BED2RangedData(bedA03)

#generate objects that contain overlapping regions
overlapA01.A02<-findOverlappingPeaks(rangedA01,rangedA02)
overlapA01.A03<-findOverlappingPeaks(rangedA01,rangedA03)
overlapA02.A03<-findOverlappingPeaks(rangedA02,rangedA03)

#number of overlapping peaks in each pairwise comparison
nrow(overlapA01.A02$OverlappingPeaks) #584
nrow(overlapA01.A03$OverlappingPeaks) #460
nrow(overlapA02.A03$OverlappingPeaks) #505


nrow(rangedA02) #834
A02peaksWithOverlapA03<-overlapA02.A03$Peaks1withOverlaps; nrow(A02peaksWithOverlapA03) #478
A02noOverlapWithA03<-rangedA02[!rownames(rangedA02) %in% rownames(A02peaksWithOverlapA03),]; nrow(A02noOverlapWithA03) #356

nrow(rangedA03) #613
A03peaksWithOverlapA02<-overlapA02.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA02) #500
A03noOverlapWithA02<-rangedA03[!rownames(rangedA03) %in% rownames(A03peaksWithOverlapA02),]; nrow(A03noOverlapWithA02) #113

#we now have overlapping and nonoverlapping chers between A02 and A03, what we want now, is to remove those chers
#that also overlap with A01 from each set

A02peaksWithOverlapA01<-overlapA01.A02$Peaks2withOverlaps; nrow(A02peaksWithOverlapA01) #575
A02peaksWithOverlapA03butNotA01<-A02peaksWithOverlapA03[!rownames(A02peaksWithOverlapA03) %in% rownames(A02peaksWithOverlapA01),]; nrow(A02peaksWithOverlapA03butNotA01) #96

A03peaksWithOverlapA01<-overlapA01.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA01) #456
A03peaksWithOverlapA02butNotA01<-A03peaksWithOverlapA02[!rownames(A03peaksWithOverlapA02) %in% rownames(A03peaksWithOverlapA01),]; nrow(A03peaksWithOverlapA02butNotA01) #99

#write RangedData to bed
#https://stat.ethz.ch/pipermail/bioconductor/2012-April/045050.html
write.table(as.data.frame(A02peaksWithOverlapA03butNotA01)[,c(1,2,3)], file="2013.09.27.A02overlapA03_butNotA01.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(as.data.frame(A03peaksWithOverlapA02butNotA01)[,c(1,2,3)], file="2013.09.27.A03overlapA02_butNotA01.bed",sep="\t",quote=F,row.names=F,col.names=F)