{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/Data/Sam/scratch\n" ] } ], "source": [ "cd /Volumes/Data/Sam/scratch/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###Quality trim & remove first 39bp from single FASTQ file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Code explanation:\n", "\n", " java -jar /usr/local/bioinformatics/Trimmomatic-0.30/trimmomatic-0.30.jar\n", "This line above initiates Trimmomatic and uses the following arguments to specify order of execution:\n", "\n", "-single end reads (SE)\n", " \n", "-number of threads (-threads 16),\n", " \n", "-type of quality score (-phred33),\n", "\n", "-input file location (/Volumes/nightingales/C_gigas/2212_lane2_CTTGTA_L002_R1_001.fastq.gz),\n", "\n", "-output file name/location (20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz),\n", "\n", "-single end Illumina TruSeq adaptor trimming (ILLUMINACLIP:/usr/local/bioinformatics/Trimmomatic-0.30/adapters/TruSeq3-SE.fa:2:30:10),\n", "\n", "-cut number of bases at beginning of each read (HEADCROP:39)\n", "\n", "-cut number of bases at beginning of read if below quality threshold (LEADING:3)\n", "\n", "-cut number of bases at end of read if below quality threshold (TRAILING:3)\n", "\n", "-cut if average quality within 4 base window falls below 15 (SLIDINGWINDOW:4:15)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "TrimmomaticSE: Started with arguments: -threads 16 -phred33 /Volumes/nightingales/C_gigas/2212_lane2_CTTGTA_L002_R1_001.fastq.gz /Volumes/Data/Sam/scratch/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz ILLUMINACLIP:/usr/local/bioinformatics/Trimmomatic-0.30/adapters/TruSeq3-SE.fa:2:30:10 HEADCROP:39 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15\n", "Using Long Clipping Sequence: 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'\n", "Using Long Clipping Sequence: 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'\n", "ILLUMINACLIP: Using 0 prefix pairs, 2 forward/reverse sequences, 0 forward only sequences, 0 reverse only sequences\n", "Input Reads: 16000000 Surviving: 15796545 (98.73%) Dropped: 203455 (1.27%)\n", "TrimmomaticSE: Completed successfully\n" ] } ], "source": [ "%%bash\n", "java -jar /usr/local/bioinformatics/Trimmomatic-0.30/trimmomatic-0.30.jar \\\n", "SE \\\n", "-threads 16 \\\n", "-phred33 \\\n", "/Volumes/nightingales/C_gigas/2212_lane2_CTTGTA_L002_R1_001.fastq.gz \\\n", "/Volumes/Data/Sam/scratch/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz \\\n", "ILLUMINACLIP:/usr/local/bioinformatics/Trimmomatic-0.30/adapters/TruSeq3-SE.fa:2:30:10 \\\n", "HEADCROP:39 \\\n", "LEADING:3 \\\n", "TRAILING:3 \\\n", "SLIDINGWINDOW:4:15" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###FASTQC on trimmed file" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Analysis complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Started analysis of 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 5% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 10% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 15% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 20% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 25% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 30% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 35% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 40% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 45% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 50% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 55% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 60% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 65% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 70% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 75% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 80% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 85% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 90% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n", "Approx 95% complete for 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz\n" ] } ], "source": [ "%%bash\n", "fastqc /Volumes/Data/Sam/scratch/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001.fastq.gz \\\n", "--outdir=/Volumes/Eagle/Arabidopsis/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###Copy files to Eagle for web-based access" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cp 20150506_* /Volumes/Eagle/Arabidopsis/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###Unzip FASTQC output" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: /Volumes/Eagle/Arabidopsis/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc.zip\n", " creating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/\n", " creating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/\n", " creating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/\n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/fastqc_icon.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/warning.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/error.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Icons/tick.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/summary.txt \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_base_quality.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_tile_quality.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_sequence_quality.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_base_sequence_content.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_sequence_gc_content.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/per_base_n_content.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/sequence_length_distribution.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/duplication_levels.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/adapter_content.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/Images/kmer_profiles.png \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/fastqc_report.html \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/fastqc_data.txt \n", " inflating: 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/fastqc.fo \n" ] } ], "source": [ "%%bash\n", "unzip /Volumes/Eagle/Arabidopsis/20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###Move unzipped folder to Eagle" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "mv 20150506_trimmed_2212_lane2_CTTGTA_L002_R1_001_fastqc/ /Volumes/Eagle/Arabidopsis/" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }