# On Sep 17, 2013, at 4:01 PM, Mackenzie Gavery wrote:
# 
# > Hi Jeff,
# >
# > Thanks for meeting with us.  Here is a recap of the design of the array that we discussed today.
# >
# > There were 3 competitive arrays performed:
# > A01. Input DNA from estrogen treated sample (pooled DNA) versus Input DNA from control sample (pooled DNA)
# > A02. Methylation Enriched estrogen treated sample versus Methylation Enriched control sample
# > A03. Dye swap of the methylation enriched samples in array A02
# >
# > The genome information for the Pacific oyster (scaffolds and gene location) can be found here: 
# > FASTA file (11969 scaffolds)
# > annotation of gene location on scaffolds as gff
# >
# > Thanks also for looking into how many arrays we have remaining.  Please let me know if you need anything else or have any questions.
# >
# > Best,
# >
# > Mackenzie
#

library(Ringo)
#generate annotation object
#probeAnno<-posToProbeAnno("121101_OysterV9_MG_meth.pos", genome="Pacific Oyster", microarrayPlatform="NimbleGen 121101 Oyster V9 MG meth")
#save(probeAnno,file="121101_OysterV9_MG_meth.ProbeAnnotation.rda")
#load annotation object
load("2013.10.03.mgavery_RcodeSentOver/121101_OysterV9_MG_meth.ProbeAnnotation.rda")

#load gff
gff<-read.delim("oyster.v9.glean.final.rename.mRNA.gff", header=F)
#scaffold1702 GLEAN mRNA   12635   17265 1.0000000  -  . ID=CGI_10001261;
names(gff)<-c("chr","source","type","start","end","score","strand","phase","name")

#read in altered targets file such that the control samlpe is in the denominator slot (Cy3)
todaysDate<-substr(format(Sys.time(), "%Y.%m.%d."),1,11)
#RG<-readNimblegen("2013.09.24.targets.txt", "spottypes.txt", headerPattern="# software=DEVA", path="pair")
#save(RG, file="2013.10.03.RG.rda")
load("2013.10.03.mgavery_RcodeSentOver/2013.10.03.RG.rda")
RG<-RG[RG$gene$Status != "RANDOM",]

#Preprocessing
NG<-preprocess(RG,method="loess")
sampleNames(NG) <-with(RG$targets, paste(FileNameCy5,"vs",FileNameCy3,sep="_"))

#ProbeIDs all have ":+:" in their names, which has been converted to "..." when the preprocess function is run.  This is in conflict
#with the probeAnno
row.names(exprs(NG))<-gsub("...",":+:",row.names(exprs(NG)), fixed=TRUE)

#smooth probe intensities
# smoothNG<-computeRunningMedians(NG,probeAnno=probeAnno, modColumn="FileNameCy5", winHalfSize=500)
smoothNG<-computeRunningMedians(NG,probeAnno=probeAnno, modColumn="FileNameCy5", winHalfSize=300)
#combinedNG<-combine(NG,smoothNG)

#flip the ratios on these such that we're looking for hypomethylated regions
#exprs(smoothNG) = -exprs(smoothNG)

#y0 <- apply(exprs(smoothNG),2,upperBoundNull)
# A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm     A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm     A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm 
# 0.7642796                                            0.7646871                                            0.7750173 

#try using .585 for the control and 1 for the experimental samples
quantile(exprs(smoothNG[,1]), na.rm=TRUE, probs=c(.5,.75,.9,.95,.98,.99))
quantile(exprs(smoothNG[,2]), na.rm=TRUE, probs=c(.5,.75,.9,.95,.98,.99))
quantile(exprs(smoothNG[,3]), na.rm=TRUE, probs=c(.5,.75,.9,.95,.98,.99))

#try 1.4 fold for the control: 0.485
y0<-c(.485,1,1)



# #histograms of smoothed intensities
# png(file="2013.11.21.mgaveryHypermethylationThresholds.png", width=1800, height=600)
# par(mfrow=c(1,3))
# hist(exprs(smoothNG[,1]), breaks=100); abline(v=y0[1], col="red")
# hist(exprs(smoothNG[,2]), breaks=100); abline(v=y0[2], col="red")
# hist(exprs(smoothNG[,3]), breaks=100); abline(v=y0[3], col="red")
# dev.off()
# 
# png(file="2013.11.21.mgaveryBoxPlots.png", width=1800, height=600)
# par(mfrow=c(1,3))
# boxplot(exprs(smoothNG[,1]), main="A01", ylim=c(-4,4))
# boxplot(exprs(smoothNG[,2]), main="A02", ylim=c(-4,4))
# boxplot(exprs(smoothNG[,3]), main="A03", ylim=c(-4,4))
# dev.off()



chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(y0[1],y0[2],y0[3]), distCutOff=600)
#chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(test0[1],test0[2],test0[3]), distCutOff=600)
chersNG <- relateChers(chersNG, gff)
chersNGD <- as.data.frame.cherList(chersNG)
chersNGD<-chersNGD[order(chersNGD$maxLevel, decreasing=TRUE),]

nrow(chersNGD) #4012
#chers detected within each array
nrow(chersNGD[grep("A01",chersNGD$name),]) #3036 
nrow(chersNGD[grep("A02",chersNGD$name),]) #448
nrow(chersNGD[grep("A03",chersNGD$name),]) #528


#try to write wig using the writeWIG function from Starr
# library(Starr)
# writeWIG(smoothNG[,1],probeAnno,file="2013.11.21.A01_smoothed.wig")
# writeWIG(smoothNG[,2],probeAnno,file="2013.11.21.A02_smoothed.wig")
# writeWIG(smoothNG[,3],probeAnno,file="2013.11.21.A03_smoothed.wig")
#had to change NAs to 0s in order to load into IGV

#y0<-test0

#find overlapping regions
library("ChIPpeakAnno")
#create ranged data objects
chersA01 <- findChersOnSmoothed(smoothNG[,1], probeAnno=probeAnno, thresholds=c(y0[1]), distCutOff=600)
chersA01 <- relateChers(chersA01, gff)
exportCherList(chersA01,filename="2013.11.21.chers_A01_HYPER.bed",format="bed",genome="oyster")
bedA01<-read.delim("2013.11.21.chers_A01_HYPER.bed", header=F)
bedA01$V6<-1
rangedA01<-BED2RangedData(bedA01)

chersA02 <- findChersOnSmoothed(smoothNG[,2], probeAnno=probeAnno, thresholds=c(y0[2]), distCutOff=600)
chersA02<- relateChers(chersA02, gff)
exportCherList(chersA02,filename="2013.11.21.chers_A02_HYPER.bed",format="bed",genome="oyster")
bedA02<-read.delim("2013.11.21.chers_A02_HYPER.bed", header=F)
bedA02$V6<-1
rangedA02<-BED2RangedData(bedA02)

chersA03 <- findChersOnSmoothed(smoothNG[,3], probeAnno=probeAnno, thresholds=c(y0[3]), distCutOff=600)
chersA03<- relateChers(chersA03, gff)
exportCherList(chersA03,filename="2013.11.21.chers_A03_HYPER.bed",format="bed",genome="oyster")
bedA03<-read.delim("2013.11.21.chers_A03_HYPER.bed", header=F)
bedA03$V6<-1
rangedA03<-BED2RangedData(bedA03)

#generate objects that contain overlapping regions
overlapA01.A02<-findOverlappingPeaks(rangedA01,rangedA02)
overlapA01.A03<-findOverlappingPeaks(rangedA01,rangedA03)
overlapA02.A03<-findOverlappingPeaks(rangedA02,rangedA03)


#using thresholds from hypermethylation
#number of overlapping peaks in each pairwise comparison
nrow(overlapA01.A02$OverlappingPeaks) #525
nrow(overlapA01.A03$OverlappingPeaks) #553
nrow(overlapA02.A03$OverlappingPeaks) #624


nrow(rangedA02) #720
A02peaksWithOverlapA03<-overlapA02.A03$Peaks1withOverlaps; nrow(A02peaksWithOverlapA03) #493
A02noOverlapWithA03<-rangedA02[!rownames(rangedA02) %in% rownames(A02peaksWithOverlapA03),]; nrow(A02noOverlapWithA03) #227

nrow(rangedA03) #718
A03peaksWithOverlapA02<-overlapA02.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA02) #491
A03noOverlapWithA02<-rangedA03[!rownames(rangedA03) %in% rownames(A03peaksWithOverlapA02),]; nrow(A03noOverlapWithA02) #227

#we now have overlapping and nonoverlapping chers between A02 and A03, what we want now, is to remove those chers
#that also overlap with A01 from each set

A02peaksWithOverlapA01<-overlapA01.A02$Peaks2withOverlaps; nrow(A02peaksWithOverlapA01) #410
A02peaksWithOverlapA03butNotA01<-A02peaksWithOverlapA03[!rownames(A02peaksWithOverlapA03) %in% rownames(A02peaksWithOverlapA01),]; nrow(A02peaksWithOverlapA03butNotA01) #160

A03peaksWithOverlapA01<-overlapA01.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA01) #390
A03peaksWithOverlapA02butNotA01<-A03peaksWithOverlapA02[!rownames(A03peaksWithOverlapA02) %in% rownames(A03peaksWithOverlapA01),]; nrow(A03peaksWithOverlapA02butNotA01) #162

#write RangedData to bed
#https://stat.ethz.ch/pipermail/bioconductor/2012-April/045050.html
write.table(as.data.frame(A02peaksWithOverlapA03butNotA01)[,c(1,2,3)], file="2013.11.21.A02overlapA03_butNotA01_HYPER.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(as.data.frame(A03peaksWithOverlapA02butNotA01)[,c(1,2,3)], file="2013.11.21.A03overlapA02_butNotA01_HYPER.bed",sep="\t",quote=F,row.names=F,col.names=F)


#generate tracks of signal intensities to view in IGV
# anno<-read.delim("121101_OysterV9_MG_meth.pos"); nrow(anno) #697752
# anno.wig<-data.frame(anno[1],anno[3:4],anno[4]+54)
# 
# fnames<-dir("pair", pattern="*.pair")
# for(f in fnames){
#   setwd("pair")
#   x<-read.delim(f, skip=1); setwd("..")
#   x<-data.frame(x[4],x[10])
#   x<-merge(anno.wig,x,by="PROBE_ID"); nrow(x)
#   x<-data.frame(x[2:5])
#   x$PM<-log2(x$PM)
#   resultName<-gsub(".pair",".wig",f)
#   write.table(x,file=resultName,sep="\t",quote=F,row.names=F,col.names=F)
# }
# 
# #draw a loess line through the data
# MA<-normalizeWithinArrays(RG,method="median")
# png(file="2013.11.19.mgaveryMAplots.png",width=1800, height=600)
# par(mfrow=c(1,3))
# scatter.smooth(MA$A[,1], MA$M[,1], pch=".", col="grey",ylim=c(-6,6), main="A01")
# scatter.smooth(MA$A[,2], MA$M[,2], pch=".", col="grey",ylim=c(-6,6), main="A02")
# scatter.smooth(MA$A[,2], MA$M[,3], pch=".", col="grey",ylim=c(-6,6), main="A03")
# dev.off()


######################################################################################################################
# Hypomethylation

#flip the ratios on these such that we're looking for hypomethylated regions
exprs(smoothNG) = -exprs(smoothNG)

#y0 <- apply(exprs(smoothNG),2,upperBoundNull)
# A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm     A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm 
# 0.7979540                                            0.7145242 
# A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm 
# 0.7663080 

#from hypermethylated data
# A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm     A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm 
# 0.6946110                                            0.6974419 
# A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm 
# 0.7191573 

#use y0 values from hypermethylated data
#y0<-c(0.6946110,0.6974419,0.7191573)


chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(y0[1],y0[2],y0[3]), distCutOff=600)
#chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(test0[1],test0[2],test0[3]), distCutOff=600)
chersNG <- relateChers(chersNG, gff)
chersNGD <- as.data.frame.cherList(chersNG)
chersNGD<-chersNGD[order(chersNGD$maxLevel, decreasing=TRUE),]

nrow(chersNGD) #3518
#chers detected within each array
nrow(chersNGD[grep("A01",chersNGD$name),]) #2758
nrow(chersNGD[grep("A02",chersNGD$name),]) #348
nrow(chersNGD[grep("A03",chersNGD$name),]) #412


#find overlapping regions
library("ChIPpeakAnno")
#create ranged data objects
chersA01 <- findChersOnSmoothed(smoothNG[,1], probeAnno=probeAnno, thresholds=c(y0[1]), distCutOff=600)
chersA01 <- relateChers(chersA01, gff)
exportCherList(chersA01,filename="2013.11.21.chers_A01_HYPO.bed",format="bed",genome="oyster")
bedA01<-read.delim("2013.11.21.chers_A01_HYPO.bed", header=F)
bedA01$V6<-1
rangedA01<-BED2RangedData(bedA01)

chersA02 <- findChersOnSmoothed(smoothNG[,2], probeAnno=probeAnno, thresholds=c(y0[2]), distCutOff=600)
chersA02<- relateChers(chersA02, gff)
exportCherList(chersA02,filename="2013.11.21.chers_A02_HYPO.bed",format="bed",genome="oyster")
bedA02<-read.delim("2013.11.21.chers_A02_HYPO.bed", header=F)
bedA02$V6<-1
rangedA02<-BED2RangedData(bedA02)

chersA03 <- findChersOnSmoothed(smoothNG[,3], probeAnno=probeAnno, thresholds=c(y0[3]), distCutOff=600)
chersA03<- relateChers(chersA03, gff)
exportCherList(chersA03,filename="2013.11.21.chers_A03_HYPO.bed",format="bed",genome="oyster")
bedA03<-read.delim("2013.11.21.chers_A03_HYPO.bed", header=F)
bedA03$V6<-1
rangedA03<-BED2RangedData(bedA03)

#generate objects that contain overlapping regions
overlapA01.A02<-findOverlappingPeaks(rangedA01,rangedA02)
overlapA01.A03<-findOverlappingPeaks(rangedA01,rangedA03)
overlapA02.A03<-findOverlappingPeaks(rangedA02,rangedA03)


#using thresholds from hypermethylation
#number of overlapping peaks in each pairwise comparison
nrow(overlapA01.A02$OverlappingPeaks) #438
nrow(overlapA01.A03$OverlappingPeaks) #421
nrow(overlapA02.A03$OverlappingPeaks) #517

nrow(rangedA02) #720
A02peaksWithOverlapA03<-overlapA02.A03$Peaks1withOverlaps; nrow(A02peaksWithOverlapA03) #493
A02noOverlapWithA03<-rangedA02[!rownames(rangedA02) %in% rownames(A02peaksWithOverlapA03),]; nrow(A02noOverlapWithA03) #227

nrow(rangedA03) #718
A03peaksWithOverlapA02<-overlapA02.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA02) #491
A03noOverlapWithA02<-rangedA03[!rownames(rangedA03) %in% rownames(A03peaksWithOverlapA02),]; nrow(A03noOverlapWithA02) #227

#we now have overlapping and nonoverlapping chers between A02 and A03, what we want now, is to remove those chers
#that also overlap with A01 from each set

A02peaksWithOverlapA01<-overlapA01.A02$Peaks2withOverlaps; nrow(A02peaksWithOverlapA01) #410
A02peaksWithOverlapA03butNotA01<-A02peaksWithOverlapA03[!rownames(A02peaksWithOverlapA03) %in% rownames(A02peaksWithOverlapA01),]; nrow(A02peaksWithOverlapA03butNotA01) #160

A03peaksWithOverlapA01<-overlapA01.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA01) #390
A03peaksWithOverlapA02butNotA01<-A03peaksWithOverlapA02[!rownames(A03peaksWithOverlapA02) %in% rownames(A03peaksWithOverlapA01),]; nrow(A03peaksWithOverlapA02butNotA01) #162

#write RangedData to bed
#https://stat.ethz.ch/pipermail/bioconductor/2012-April/045050.html
write.table(as.data.frame(A02peaksWithOverlapA03butNotA01)[,c(1,2,3)], file="2013.11.21.A02overlapA03_butNotA01_HYPO.bed",sep="\t",quote=F,row.names=F,col.names=F)
write.table(as.data.frame(A03peaksWithOverlapA02butNotA01)[,c(1,2,3)], file="2013.11.21.A03overlapA02_butNotA01_HYPO.bed",sep="\t",quote=F,row.names=F,col.names=F)


#what does the distribution of the RANDOM negative control probes look like?
rg<-readNimblegen("2013.09.24.targets.txt", "spottypes.txt", headerPattern="# software=DEVA", path="pair")

#draw a loess line through the data
MA<-normalizeWithinArrays(rg,method="none")
MA.random<-MA[MA$genes$GENE_EXPR_OPTION == "RANDOM",]; nrow(MA.random) #23835
png(file="2013.11.20.mgaveryMAplots_RANDOM.png",width=1800, height=600)
par(mfrow=c(1,3))
scatter.smooth(MA.random$A[,1], MA.random$M[,1], pch=".", col="grey",ylim=c(-6,6), main="A01")
scatter.smooth(MA.random$A[,2], MA.random$M[,2], pch=".", col="grey",ylim=c(-6,6), main="A02")
scatter.smooth(MA.random$A[,2], MA.random$M[,3], pch=".", col="grey",ylim=c(-6,6), main="A03")
dev.off()

png(file="2013.11.20.mgaveryMAplots.png",width=1800, height=600)
par(mfrow=c(1,3))
plotMA(MA,array=1)
plotMA(MA,array=2)
plotMA(MA,array=3)
dev.off()


MA<-normalizeWithinArrays(RG,method="none")
png(file="2013.11.20.mgaveryBoxPlots.png", width=1200, height=600)
par(mfrow=c(1,2))
boxplot(MA$A, main="Sample Probes", ylab="A")
boxplot(MA.random$A, main="RANDOM Control Probes", ylab="A")
dev.off()

png(file="2013.11.20.mgaveryMAplots_noRANDOM.png",width=1800, height=600)
par(mfrow=c(1,3))
scatter.smooth(MA$A[,1], MA$M[,1], pch=".", col="grey",ylim=c(-6,6), main="A01")
scatter.smooth(MA$A[,2], MA$M[,2], pch=".", col="grey",ylim=c(-6,6), main="A02")
scatter.smooth(MA$A[,2], MA$M[,3], pch=".", col="grey",ylim=c(-6,6), main="A03")
dev.off()