Assumptions- as with most worflows in development - a working directory is used.
#SR edits - adding direct link to ipynb viewer and raw files
!date
filename='BSMAP2MK_workflow_T1D3'
!echo 'http://nbviewer.ipython.org/github/sr320/ipython_nb/blob/master/''{filename}''.ipynb'
!echo 'http://nbviewer.ipython.org/github/sr320/ipython_nb/blob/master/''{filename}''.ipynb'
#file ID
fid="CgLarvT1D3"
#TIMESTAMP
date=!date +%m%d_%H%M
#working directory (parent)
wd="/Volumes/web/Mollusk/bs_larvae_exp/"
#where is bsmap
bsmap="/Users/Shared/Apps/bsmap-2.73/"
#fastq files location R1 location
R1="/Volumes/web/Mollusk/bs_larvae_exp/Concatenated_Files_R1/T1D3_R1.fastq"
#fastq files location R2 location
#comment out if SE
R2="/Volumes/web/Mollusk/bs_larvae_exp/Concatenated_Files_R2/T1D3_R2.fastq"
#genome file
genome="/Volumes/web/whale/ensembl/ftp.ensemblgenomes.org/pub/release-21/metazoa/fasta/crassostrea_gigas/dna/Crassostrea_gigas.GCA_000297895.1.21.dna_sm.genome.fa"
cd {wd}
mkdir {fid}_{date}
cd {fid}_{date}
#option - number of processes
!{bsmap}bsmap -a {R1} -b {R2} -d {genome} -o bsmap_out.sam -p 1
!python {bsmap}methratio.py -d {genome} -u -z -g -o methratio_out.txt -s {bsmap}samtools bsmap_out.sam
#command for only obtaining the context '__CG_'
!grep "[A-Z][A-Z]CG[A-Z]" <methratio_out.txt> methratio_out_CG.txt
#obtaining a filtered file with at least 5x coverage
!awk '$8 >= 5' <methratio_out_CG.txt> methratio_out_CG5x.txt
#Now I need to format my files to be read into the methylKit package in R
!/Volumes/web/Mollusk/bs_larvae_exp/methratio.awk.sh methratio_out_CG5x.txt > methratio_out_CG_mkit.txt
!head methratio_out_CG_mkit.txt