# On Sep 17, 2013, at 4:01 PM, Mackenzie Gavery wrote: # # > Hi Jeff, # > # > Thanks for meeting with us. Here is a recap of the design of the array that we discussed today. # > # > There were 3 competitive arrays performed: # > A01. Input DNA from estrogen treated sample (pooled DNA) versus Input DNA from control sample (pooled DNA) # > A02. Methylation Enriched estrogen treated sample versus Methylation Enriched control sample # > A03. Dye swap of the methylation enriched samples in array A02 # > # > The genome information for the Pacific oyster (scaffolds and gene location) can be found here: # > FASTA file (11969 scaffolds) # > annotation of gene location on scaffolds as gff # > # > Thanks also for looking into how many arrays we have remaining. Please let me know if you need anything else or have any questions. # > # > Best, # > # > Mackenzie # library(Ringo) #generate annotation object #probeAnno<-posToProbeAnno("121101_OysterV9_MG_meth.pos", genome="Pacific Oyster", microarrayPlatform="NimbleGen 121101 Oyster V9 MG meth") #save(probeAnno,file="121101_OysterV9_MG_meth.ProbeAnnotation.rda") #load annotation object load("2013.10.03.mgavery_RcodeSentOver/121101_OysterV9_MG_meth.ProbeAnnotation.rda") #load gff gff<-read.delim("oyster.v9.glean.final.rename.mRNA.gff", header=F) #scaffold1702 GLEAN mRNA 12635 17265 1.0000000 - . ID=CGI_10001261; names(gff)<-c("chr","source","type","start","end","score","strand","phase","name") #read in altered targets file such that the control samlpe is in the denominator slot (Cy3) todaysDate<-substr(format(Sys.time(), "%Y.%m.%d."),1,11) #RG<-readNimblegen("2013.09.24.targets.txt", "spottypes.txt", headerPattern="# software=DEVA", path="pair") #save(RG, file="2013.10.03.RG.rda") load("2013.10.03.mgavery_RcodeSentOver/2013.10.03.RG.rda") RG<-RG[RG$gene$Status != "RANDOM",] #Preprocessing NG<-preprocess(RG,method="loess") sampleNames(NG) <-with(RG$targets, paste(FileNameCy5,"vs",FileNameCy3,sep="_")) #ProbeIDs all have ":+:" in their names, which has been converted to "..." when the preprocess function is run. This is in conflict #with the probeAnno row.names(exprs(NG))<-gsub("...",":+:",row.names(exprs(NG)), fixed=TRUE) #smooth probe intensities # smoothNG<-computeRunningMedians(NG,probeAnno=probeAnno, modColumn="FileNameCy5", winHalfSize=500) smoothNG<-computeRunningMedians(NG,probeAnno=probeAnno, modColumn="FileNameCy5", winHalfSize=300) #combinedNG<-combine(NG,smoothNG) #flip the ratios on these such that we're looking for hypomethylated regions #exprs(smoothNG) = -exprs(smoothNG) #y0 <- apply(exprs(smoothNG),2,upperBoundNull) # A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm # 0.7642796 0.7646871 0.7750173 #try using .585 for the control and 1 for the experimental samples quantile(exprs(smoothNG[,1]), na.rm=TRUE, probs=c(.5,.75,.9,.95,.98,.99)) quantile(exprs(smoothNG[,2]), na.rm=TRUE, probs=c(.5,.75,.9,.95,.98,.99)) quantile(exprs(smoothNG[,3]), na.rm=TRUE, probs=c(.5,.75,.9,.95,.98,.99)) #try 1.4 fold for the control: 0.485 y0<-c(.485,1,1) # #histograms of smoothed intensities # png(file="2013.11.21.mgaveryHypermethylationThresholds.png", width=1800, height=600) # par(mfrow=c(1,3)) # hist(exprs(smoothNG[,1]), breaks=100); abline(v=y0[1], col="red") # hist(exprs(smoothNG[,2]), breaks=100); abline(v=y0[2], col="red") # hist(exprs(smoothNG[,3]), breaks=100); abline(v=y0[3], col="red") # dev.off() # # png(file="2013.11.21.mgaveryBoxPlots.png", width=1800, height=600) # par(mfrow=c(1,3)) # boxplot(exprs(smoothNG[,1]), main="A01", ylim=c(-4,4)) # boxplot(exprs(smoothNG[,2]), main="A02", ylim=c(-4,4)) # boxplot(exprs(smoothNG[,3]), main="A03", ylim=c(-4,4)) # dev.off() chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(y0[1],y0[2],y0[3]), distCutOff=600) #chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(test0[1],test0[2],test0[3]), distCutOff=600) chersNG <- relateChers(chersNG, gff) chersNGD <- as.data.frame.cherList(chersNG) chersNGD<-chersNGD[order(chersNGD$maxLevel, decreasing=TRUE),] nrow(chersNGD) #4012 #chers detected within each array nrow(chersNGD[grep("A01",chersNGD$name),]) #3036 nrow(chersNGD[grep("A02",chersNGD$name),]) #448 nrow(chersNGD[grep("A03",chersNGD$name),]) #528 #try to write wig using the writeWIG function from Starr # library(Starr) # writeWIG(smoothNG[,1],probeAnno,file="2013.11.21.A01_smoothed.wig") # writeWIG(smoothNG[,2],probeAnno,file="2013.11.21.A02_smoothed.wig") # writeWIG(smoothNG[,3],probeAnno,file="2013.11.21.A03_smoothed.wig") #had to change NAs to 0s in order to load into IGV #y0<-test0 #find overlapping regions library("ChIPpeakAnno") #create ranged data objects chersA01 <- findChersOnSmoothed(smoothNG[,1], probeAnno=probeAnno, thresholds=c(y0[1]), distCutOff=600) chersA01 <- relateChers(chersA01, gff) exportCherList(chersA01,filename="2013.11.21.chers_A01_HYPER.bed",format="bed",genome="oyster") bedA01<-read.delim("2013.11.21.chers_A01_HYPER.bed", header=F) bedA01$V6<-1 rangedA01<-BED2RangedData(bedA01) chersA02 <- findChersOnSmoothed(smoothNG[,2], probeAnno=probeAnno, thresholds=c(y0[2]), distCutOff=600) chersA02<- relateChers(chersA02, gff) exportCherList(chersA02,filename="2013.11.21.chers_A02_HYPER.bed",format="bed",genome="oyster") bedA02<-read.delim("2013.11.21.chers_A02_HYPER.bed", header=F) bedA02$V6<-1 rangedA02<-BED2RangedData(bedA02) chersA03 <- findChersOnSmoothed(smoothNG[,3], probeAnno=probeAnno, thresholds=c(y0[3]), distCutOff=600) chersA03<- relateChers(chersA03, gff) exportCherList(chersA03,filename="2013.11.21.chers_A03_HYPER.bed",format="bed",genome="oyster") bedA03<-read.delim("2013.11.21.chers_A03_HYPER.bed", header=F) bedA03$V6<-1 rangedA03<-BED2RangedData(bedA03) #generate objects that contain overlapping regions overlapA01.A02<-findOverlappingPeaks(rangedA01,rangedA02) overlapA01.A03<-findOverlappingPeaks(rangedA01,rangedA03) overlapA02.A03<-findOverlappingPeaks(rangedA02,rangedA03) #using thresholds from hypermethylation #number of overlapping peaks in each pairwise comparison nrow(overlapA01.A02$OverlappingPeaks) #525 nrow(overlapA01.A03$OverlappingPeaks) #553 nrow(overlapA02.A03$OverlappingPeaks) #624 nrow(rangedA02) #720 A02peaksWithOverlapA03<-overlapA02.A03$Peaks1withOverlaps; nrow(A02peaksWithOverlapA03) #493 A02noOverlapWithA03<-rangedA02[!rownames(rangedA02) %in% rownames(A02peaksWithOverlapA03),]; nrow(A02noOverlapWithA03) #227 nrow(rangedA03) #718 A03peaksWithOverlapA02<-overlapA02.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA02) #491 A03noOverlapWithA02<-rangedA03[!rownames(rangedA03) %in% rownames(A03peaksWithOverlapA02),]; nrow(A03noOverlapWithA02) #227 #we now have overlapping and nonoverlapping chers between A02 and A03, what we want now, is to remove those chers #that also overlap with A01 from each set A02peaksWithOverlapA01<-overlapA01.A02$Peaks2withOverlaps; nrow(A02peaksWithOverlapA01) #410 A02peaksWithOverlapA03butNotA01<-A02peaksWithOverlapA03[!rownames(A02peaksWithOverlapA03) %in% rownames(A02peaksWithOverlapA01),]; nrow(A02peaksWithOverlapA03butNotA01) #160 A03peaksWithOverlapA01<-overlapA01.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA01) #390 A03peaksWithOverlapA02butNotA01<-A03peaksWithOverlapA02[!rownames(A03peaksWithOverlapA02) %in% rownames(A03peaksWithOverlapA01),]; nrow(A03peaksWithOverlapA02butNotA01) #162 #write RangedData to bed #https://stat.ethz.ch/pipermail/bioconductor/2012-April/045050.html write.table(as.data.frame(A02peaksWithOverlapA03butNotA01)[,c(1,2,3)], file="2013.11.21.A02overlapA03_butNotA01_HYPER.bed",sep="\t",quote=F,row.names=F,col.names=F) write.table(as.data.frame(A03peaksWithOverlapA02butNotA01)[,c(1,2,3)], file="2013.11.21.A03overlapA02_butNotA01_HYPER.bed",sep="\t",quote=F,row.names=F,col.names=F) #generate tracks of signal intensities to view in IGV # anno<-read.delim("121101_OysterV9_MG_meth.pos"); nrow(anno) #697752 # anno.wig<-data.frame(anno[1],anno[3:4],anno[4]+54) # # fnames<-dir("pair", pattern="*.pair") # for(f in fnames){ # setwd("pair") # x<-read.delim(f, skip=1); setwd("..") # x<-data.frame(x[4],x[10]) # x<-merge(anno.wig,x,by="PROBE_ID"); nrow(x) # x<-data.frame(x[2:5]) # x$PM<-log2(x$PM) # resultName<-gsub(".pair",".wig",f) # write.table(x,file=resultName,sep="\t",quote=F,row.names=F,col.names=F) # } # # #draw a loess line through the data # MA<-normalizeWithinArrays(RG,method="median") # png(file="2013.11.19.mgaveryMAplots.png",width=1800, height=600) # par(mfrow=c(1,3)) # scatter.smooth(MA$A[,1], MA$M[,1], pch=".", col="grey",ylim=c(-6,6), main="A01") # scatter.smooth(MA$A[,2], MA$M[,2], pch=".", col="grey",ylim=c(-6,6), main="A02") # scatter.smooth(MA$A[,2], MA$M[,3], pch=".", col="grey",ylim=c(-6,6), main="A03") # dev.off() ###################################################################################################################### # Hypomethylation #flip the ratios on these such that we're looking for hypomethylated regions exprs(smoothNG) = -exprs(smoothNG) #y0 <- apply(exprs(smoothNG),2,upperBoundNull) # A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm # 0.7979540 0.7145242 # A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm # 0.7663080 #from hypermethylated data # A01_EE2.input_532.pair_vs_A01_Ctrl.input_635.pair.sm A02_EE2.MBD_532.pair_vs_A02_Ctrl.MBD_635.pair.sm # 0.6946110 0.6974419 # A03_EE2.MBD_635.pair_vs_A03_Ctrl.MBD_532.pair.sm # 0.7191573 #use y0 values from hypermethylated data #y0<-c(0.6946110,0.6974419,0.7191573) chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(y0[1],y0[2],y0[3]), distCutOff=600) #chersNG <- findChersOnSmoothed(smoothNG, probeAnno=probeAnno, thresholds=c(test0[1],test0[2],test0[3]), distCutOff=600) chersNG <- relateChers(chersNG, gff) chersNGD <- as.data.frame.cherList(chersNG) chersNGD<-chersNGD[order(chersNGD$maxLevel, decreasing=TRUE),] nrow(chersNGD) #3518 #chers detected within each array nrow(chersNGD[grep("A01",chersNGD$name),]) #2758 nrow(chersNGD[grep("A02",chersNGD$name),]) #348 nrow(chersNGD[grep("A03",chersNGD$name),]) #412 #find overlapping regions library("ChIPpeakAnno") #create ranged data objects chersA01 <- findChersOnSmoothed(smoothNG[,1], probeAnno=probeAnno, thresholds=c(y0[1]), distCutOff=600) chersA01 <- relateChers(chersA01, gff) exportCherList(chersA01,filename="2013.11.21.chers_A01_HYPO.bed",format="bed",genome="oyster") bedA01<-read.delim("2013.11.21.chers_A01_HYPO.bed", header=F) bedA01$V6<-1 rangedA01<-BED2RangedData(bedA01) chersA02 <- findChersOnSmoothed(smoothNG[,2], probeAnno=probeAnno, thresholds=c(y0[2]), distCutOff=600) chersA02<- relateChers(chersA02, gff) exportCherList(chersA02,filename="2013.11.21.chers_A02_HYPO.bed",format="bed",genome="oyster") bedA02<-read.delim("2013.11.21.chers_A02_HYPO.bed", header=F) bedA02$V6<-1 rangedA02<-BED2RangedData(bedA02) chersA03 <- findChersOnSmoothed(smoothNG[,3], probeAnno=probeAnno, thresholds=c(y0[3]), distCutOff=600) chersA03<- relateChers(chersA03, gff) exportCherList(chersA03,filename="2013.11.21.chers_A03_HYPO.bed",format="bed",genome="oyster") bedA03<-read.delim("2013.11.21.chers_A03_HYPO.bed", header=F) bedA03$V6<-1 rangedA03<-BED2RangedData(bedA03) #generate objects that contain overlapping regions overlapA01.A02<-findOverlappingPeaks(rangedA01,rangedA02) overlapA01.A03<-findOverlappingPeaks(rangedA01,rangedA03) overlapA02.A03<-findOverlappingPeaks(rangedA02,rangedA03) #using thresholds from hypermethylation #number of overlapping peaks in each pairwise comparison nrow(overlapA01.A02$OverlappingPeaks) #438 nrow(overlapA01.A03$OverlappingPeaks) #421 nrow(overlapA02.A03$OverlappingPeaks) #517 nrow(rangedA02) #720 A02peaksWithOverlapA03<-overlapA02.A03$Peaks1withOverlaps; nrow(A02peaksWithOverlapA03) #493 A02noOverlapWithA03<-rangedA02[!rownames(rangedA02) %in% rownames(A02peaksWithOverlapA03),]; nrow(A02noOverlapWithA03) #227 nrow(rangedA03) #718 A03peaksWithOverlapA02<-overlapA02.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA02) #491 A03noOverlapWithA02<-rangedA03[!rownames(rangedA03) %in% rownames(A03peaksWithOverlapA02),]; nrow(A03noOverlapWithA02) #227 #we now have overlapping and nonoverlapping chers between A02 and A03, what we want now, is to remove those chers #that also overlap with A01 from each set A02peaksWithOverlapA01<-overlapA01.A02$Peaks2withOverlaps; nrow(A02peaksWithOverlapA01) #410 A02peaksWithOverlapA03butNotA01<-A02peaksWithOverlapA03[!rownames(A02peaksWithOverlapA03) %in% rownames(A02peaksWithOverlapA01),]; nrow(A02peaksWithOverlapA03butNotA01) #160 A03peaksWithOverlapA01<-overlapA01.A03$Peaks2withOverlaps; nrow(A03peaksWithOverlapA01) #390 A03peaksWithOverlapA02butNotA01<-A03peaksWithOverlapA02[!rownames(A03peaksWithOverlapA02) %in% rownames(A03peaksWithOverlapA01),]; nrow(A03peaksWithOverlapA02butNotA01) #162 #write RangedData to bed #https://stat.ethz.ch/pipermail/bioconductor/2012-April/045050.html write.table(as.data.frame(A02peaksWithOverlapA03butNotA01)[,c(1,2,3)], file="2013.11.21.A02overlapA03_butNotA01_HYPO.bed",sep="\t",quote=F,row.names=F,col.names=F) write.table(as.data.frame(A03peaksWithOverlapA02butNotA01)[,c(1,2,3)], file="2013.11.21.A03overlapA02_butNotA01_HYPO.bed",sep="\t",quote=F,row.names=F,col.names=F) #what does the distribution of the RANDOM negative control probes look like? rg<-readNimblegen("2013.09.24.targets.txt", "spottypes.txt", headerPattern="# software=DEVA", path="pair") #draw a loess line through the data MA<-normalizeWithinArrays(rg,method="none") MA.random<-MA[MA$genes$GENE_EXPR_OPTION == "RANDOM",]; nrow(MA.random) #23835 png(file="2013.11.20.mgaveryMAplots_RANDOM.png",width=1800, height=600) par(mfrow=c(1,3)) scatter.smooth(MA.random$A[,1], MA.random$M[,1], pch=".", col="grey",ylim=c(-6,6), main="A01") scatter.smooth(MA.random$A[,2], MA.random$M[,2], pch=".", col="grey",ylim=c(-6,6), main="A02") scatter.smooth(MA.random$A[,2], MA.random$M[,3], pch=".", col="grey",ylim=c(-6,6), main="A03") dev.off() png(file="2013.11.20.mgaveryMAplots.png",width=1800, height=600) par(mfrow=c(1,3)) plotMA(MA,array=1) plotMA(MA,array=2) plotMA(MA,array=3) dev.off() MA<-normalizeWithinArrays(RG,method="none") png(file="2013.11.20.mgaveryBoxPlots.png", width=1200, height=600) par(mfrow=c(1,2)) boxplot(MA$A, main="Sample Probes", ylab="A") boxplot(MA.random$A, main="RANDOM Control Probes", ylab="A") dev.off() png(file="2013.11.20.mgaveryMAplots_noRANDOM.png",width=1800, height=600) par(mfrow=c(1,3)) scatter.smooth(MA$A[,1], MA$M[,1], pch=".", col="grey",ylim=c(-6,6), main="A01") scatter.smooth(MA$A[,2], MA$M[,2], pch=".", col="grey",ylim=c(-6,6), main="A02") scatter.smooth(MA$A[,2], MA$M[,3], pch=".", col="grey",ylim=c(-6,6), main="A03") dev.off()