TACC: Setting memory limits for job 987597 to unlimited KB TACC: Dumping job script: -------------------------------------------------------------------------------- #!/bin/bash #$ -N soap_fish545_peassembly-2013-01-24-11-47-11901 #$ -cwd #$ -V #$ -o soap_fish545_peassembly-2013-01-24-11-47-11901-6352.out #$ -e soap_fish545_peassembly-2013-01-24-11-47-11901-6352.err #$ -l h_rt=01:00:00 #$ -A iPlant-Master #$ -pe 1way 24 #$ -q largemem curl -k "https://foundation.iplantcollaborative.org/apps-v1/trigger/job/6352/token/f4d9aba6-6891-4e4d-b5c0-493c280b716f/status/RUNNING" cd /scratch/0004/iplant/sr320/job-6352-soap_fish545_peassembly-2013-01-24-11-47-11901/sr320-6352-1359028044000 # Environmental settings for soapdenovo-1.05u1: module purge module load TACC module load irods #!/bin/bash #wrapper script for creating a config file or manifest for the soapdenovo assembler #more stuff from Roger Barthelson max_rd_len="75" avg_ins1="200" reverse_seq1="0" asm_flags1="3" rank1="" format1="q" reads1_1="/iplant/home//sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz" reads1_2="/iplant/home//sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz" ####################### avg_ins2="" reverse_seq2="0" asm_flags2="3" rank2="" format2="q" reads2_1="${reads2_1}" reads2_2="${reads2_2}" ####################### avg_ins3="--reverse_seqs3=0" reverse_seq3="" asm_flags3="3" rank3="" format3="q" reads3_1="${reads3_1}" reads3_2="${reads3_2}" ######################## avg_ins4="" reverse_seq4="" asm_flags4="3" rank4="" format4="q" reads4_1="${reads4_1}" reads4_2="${reads4_2}" ######################### avg_ins5="" reverse_seq5="" asm_flags5="3" rank5="" format5="q" reads5_1="${reads5_1}" reads5_2="${reads5_2}" ########################################################################### Output="SoapOutput" kmer="27" n_cpu="24" mergeLevel="-M" dkmers="" dEdges="" repeats="" gapLenDiff="50" minLen="" unmask="" options="-M""""""""50""""" iget -fT "/iplant/home//sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz" iget -fT "/iplant/home//sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz" INPUT_F1_1=$(basename /iplant/home//sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz) INPUT_F1_2=$(basename /iplant/home//sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz) echo max_rd_len="75" > config_file.txt echo "[LIB]" >> config_file.txt echo avg_ins="200" >> config_file.txt echo reverse_seq="0" >> config_file.txt echo asm_flags="3" >> config_file.txt echo rank="" >> config_file.txt echo "q"1"="${INPUT_F1_1} >> config_file.txt echo "q"2"="${INPUT_F1_2} >> config_file.txt if [ "avg_ins2" != '' ] ; then iget -fT "${reads2_1}" iget -fT "${reads2_2}" INPUT_F2_1=$(basename ${reads2_1}) INPUT_F2_2=$(basename ${reads2_2}) echo "[LIB]" >> config_file.txt echo avg_ins="" >> config_file.txt echo reverse_seq="0" >> config_file.txt echo asm_flags="3" >> config_file.txt echo rank="" >> config_file.txt echo "q"1"="${INPUT_F2_1} >> config_file.txt echo "q"2"="${INPUT_F2_2} >> config_file.txt fi if [ "--reverse_seqs3=0" != '' ] ; then iget -fT "${reads3_1}" iget -fT "${reads3_2}" INPUT_F3_1=$(basename ${reads3_1}) INPUT_F3_2=$(basename ${reads3_2}) echo "[LIB]" >> config_file.txt echo avg_ins="--reverse_seqs3=0" >> config_file.txt echo reverse_seq="" >> config_file.txt echo asm_flags="3" >> config_file.txt echo rank="" >> config_file.txt echo "q"1"="${INPUT_F3_1} >> config_file.txt echo "q"2"="${INPUT_F3_2} >> config_file.txt fi if [ "" != '' ] ; then iget -fT "${reads4_1}" iget -fT "${reads4_2}" INPUT_F4_1=$(basename ${reads4_1}) INPUT_F4_2=$(basename ${reads4_2}) echo "[LIB]" >> config_file.txt echo avg_ins="" >> config_file.txt echo reverse_seq="" >> config_file.txt echo asm_flags="3" >> config_file.txt echo rank="" >> config_file.txt echo "q"1"="${INPUT_F4_1} >> config_file.txt echo "q"2"="${INPUT_F4_2} >> config_file.txt fi if [ "${reads5_1}" != '' ] ; then iget -fT "${reads5_1}" INPUT_F5_1=$(basename ${reads5_1}) if [ "${reads5_2}" != '' ] ; then iget -fT "${reads5_2}" INPUT_F5_2=$(basename ${reads5_2}) echo "[LIB]" >> config_file.txt echo avg_ins="" >> config_file.txt echo reverse_seq="" >> config_file.txt echo asm_flags="3" >> config_file.txt echo rank="" >> config_file.txt echo "q"1"="${INPUT_F5_1} >> config_file.txt echo "q"2"="${INPUT_F5_2} >> config_file.txt else echo "[LIB]" >> config_file.txt echo asm_flags=1 >> config_file.txt echo rank="" >> config_file.txt echo "q""="${INPUT_F5_1} >> config_file.txt fi fi # SOAPdenovo all -s configFile [-K kmer -d -D -M mergeLevel -R -u -G gapLenDiff -L minContigLen -p n_cpu] -o Output # -s ShortSeqFile: The input file name of solexa reads # -a initKmerSetSize: define the initial KmerSet size(unit: GB) # -K kmer(default 23): k value in kmer # -p n_cpu(default 8): number of cpu for use # -F (optional) fill gaps in scaffold # -M mergeLevel(default 1,min 0, max 3): the strength of merging similar sequences during contiging # -d (optional): delete kmers with frequency one (default no) # -D (optional): delete edges with coverage one (default no) # -R (optional): unsolve repeats by reads (default no) # -G gapLenDiff(default 50): allowed length difference between estimated and filled gap # -L minLen(default K+2): shortest contig for scaffolding # -u (optional): un-mask contigs with high coverage before scaffolding (default mask) # -o Output: prefix of output file name module load soap SOAPdenovo-63mer all -s config_file.txt -K "27" -p "24" -o "SoapOutput" "${options}" curl -k "https://foundation.iplantcollaborative.org/apps-v1/trigger/job/6352/token/f4d9aba6-6891-4e4d-b5c0-493c280b716f/status/FINISHED" curl -k "https://foundation.iplantcollaborative.org/apps-v1/trigger/job/6352/token/f4d9aba6-6891-4e4d-b5c0-493c280b716f/status/ARCHIVING" imkdir -p /iplant/home/sr320/analyses/SOAP_fish545_PEassembly-2013-01-24-11-47-11.901 for i in `find . -maxdepth 1`; do exists=`grep -x "$i" .iplant.archive` if [ ! -n "$exists" ]; then iput -T -v -f -r $i /iplant/home/sr320/analyses/SOAP_fish545_PEassembly-2013-01-24-11-47-11.901 fi done curl -k "https://foundation.iplantcollaborative.org/apps-v1/trigger/job/6352/token/f4d9aba6-6891-4e4d-b5c0-493c280b716f/status/ARCHIVING_FINISHED" -------------------------------------------------------------------------------- TACC: Done. {"status":"success","message":"","result":{"id":6352,"name":"SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","owner":"sr320","system":"lonestar4.tacc.teragrid.org","software":"soapdenovo-1.05u1","processors":1,"requestedTime":"01:00:00","memory":1000,"callbackUrl":null,"archive":true,"archivePath":"/sr320/analyses/SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","outputPath":null,"outputUrl":"https://foundation.iplantcollaborative.org/apps-v1/job/6352/output/list","status":"RUNNING","submitTime":1359028060000,"startTime":1359028149690,"endTime":null,"inputs":[{"reads1_1":"/sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz"},{"reads1_2":"/sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz"}],"parameters":[{"dEdges":""},{"format5":"q"},{"unmask":""},{"format4":"q"},{"Output":"SoapOutput"},{"format3":"q"},{"repeats":""},{"format2":"q"},{"format1":"q"},{"avg_ins2":""},{"avg_ins1":"200"},{"n_cpu":"24"},{"avg_ins4":""},{"gapLenDiff":"50"},{"avg_ins3":"--reverse_seqs3=0"},{"avg_ins5":""},{"kmer":"27"},{"asm_flags1":"3"},{"asm_flags2":"3"},{"reverse_seq1":"0"},{"minLen":""},{"rank3":""},{"reverse_seq4":""},{"rank2":""},{"reverse_seq5":""},{"reverse_seq2":"0"},{"max_rd_len":"75"},{"rank1":""},{"reverse_seq3":""},{"mergeLevel":"-M"},{"rank5":""},{"dkmers":""},{"rank4":""},{"asm_flags4":"3"},{"asm_flags3":"3"},{"asm_flags5":"3"}],"permissions":[]}} Version 1.05: released on July 29th, 2010 pregraph -s config_file.txt -K 27 -p 24 -o SoapOutput In config_file.txt, 3 libs, max seq len 75, max name len 256 24 thread created read from file: filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz read from file: filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz time spent on hash reads: 864s, 79646478 reads processed [LIB] 0, avg_ins 0, reverse 0 [LIB] 1, avg_ins 0, reverse 0 [LIB] 2, avg_ins 200, reverse 0 392551809 nodes allocated, 3902677422 kmer in reads, 3902677422 kmer processed 362985046 linear nodes time spent on marking linear nodes 21s time spent on pre-graph construction: 885s deLowKmer 0, deLowEdge 0 Start to remove tips of single frequency kmers short than 54 11601269 tips off 24 thread created 5931823 linear nodes Start to remove tips which don't contribute the most links kmer set 0 done kmer set 1 done kmer set 2 done kmer set 3 done kmer set 4 done kmer set 5 done kmer set 6 done kmer set 7 done kmer set 8 done kmer set 9 done kmer set 10 done kmer set 11 done kmer set 12 done kmer set 13 done kmer set 14 done kmer set 15 done kmer set 16 done kmer set 17 done kmer set 18 done kmer set 19 done kmer set 20 done kmer set 21 done kmer set 22 done kmer set 23 done 1102635 tips off 24 thread created 0 linear nodes time spent on cutTipe: 217s --- 20000000 edges built 31395460 (15699996) edges 4012474 extra nodes time spent on making edges: 165s In file: config_file.txt, max seq len 75, max name len 256 24 thread created read from file: filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz read from file: filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz 79646478 reads processed time 103,26,122,40,62,59,0 0 markers outputed done mapping reads, 2374015 reads deleted, 28870530 arcs created [LIB] 0, avg_ins 0, reverse 0 [LIB] 1, avg_ins 0, reverse 0 [LIB] 2, avg_ins 200, reverse 0 time spent on mapping reads: 432s 10037786 vertex outputed overall time for lightgraph: 28m contig -g SoapOutput -M 50 there're 10037786 kmers in vertex file there're 31395460 edge in edge file done sort input 31395460 edges 41785598 pre-arcs loaded start to pinch bubbles, cutoff 0.100000, MAX NODE NUM 30, MAX DIFF 10 .............100000 .............200000 .............300000 .............400000 .............500000 .............600000 .............700000 .............800000 .............900000 .............1000000 .............1100000 .............1200000 .............1300000 .............1400000 .............1500000 .............1600000 .............1700000 .............1800000 .............1900000 .............2000000 .............2100000 .............2200000 .............2300000 484817 startingPoints, 22410744 dheap nodes 6591571 pairs found, 4229441 pairs of paths compared, 2382458 pairs merged sequenc compare failure: 0 1653427 8738 184818 DFibHeap: 1515427 Nodes allocated a linear concatenation lap, 3615167 concatenated a linear concatenation lap, 272541 concatenated a linear concatenation lap, 5522 concatenated a linear concatenation lap, 0 concatenated 16334624 edges in graph time spent on bubblePinch: 214s 1867436 weak inner edges destroyed 3676684 dead arcs removed Remove low coverage(1): 363077 inner edges destroyed 780475 dead arcs removed a linear concatenation lap, 1665794 concatenated a linear concatenation lap, 53169 concatenated a linear concatenation lap, 68 concatenated a linear concatenation lap, 0 concatenated 8439118 edges in graph there're 31395460 edges after compacting 8458092 edges left strict 0, cutLen 54 a cutTipsInGraph lap, 1774709 tips cut a cutTipsInGraph lap, 239103 tips cut a cutTipsInGraph lap, 99576 tips cut a cutTipsInGraph lap, 55339 tips cut a cutTipsInGraph lap, 42039 tips cut a cutTipsInGraph lap, 37785 tips cut a cutTipsInGraph lap, 37656 tips cut a cutTipsInGraph lap, 38856 tips cut a cutTipsInGraph lap, 36450 tips cut a cutTipsInGraph lap, 28946 tips cut a cutTipsInGraph lap, 21127 tips cut a cutTipsInGraph lap, 14054 tips cut a cutTipsInGraph lap, 8787 tips cut a cutTipsInGraph lap, 4815 tips cut a cutTipsInGraph lap, 2374 tips cut a cutTipsInGraph lap, 1142 tips cut a cutTipsInGraph lap, 583 tips cut a cutTipsInGraph lap, 445 tips cut a cutTipsInGraph lap, 327 tips cut a cutTipsInGraph lap, 226 tips cut a cutTipsInGraph lap, 111 tips cut a cutTipsInGraph lap, 59 tips cut a cutTipsInGraph lap, 33 tips cut a cutTipsInGraph lap, 16 tips cut a cutTipsInGraph lap, 18 tips cut a cutTipsInGraph lap, 14 tips cut a cutTipsInGraph lap, 7 tips cut a cutTipsInGraph lap, 7 tips cut a cutTipsInGraph lap, 3 tips cut a cutTipsInGraph lap, 1 tips cut a cutTipsInGraph lap, 1 tips cut a cutTipsInGraph lap, 1 tips cut a cutTipsInGraph lap, 3 tips cut a cutTipsInGraph lap, 2 tips cut a cutTipsInGraph lap, 0 tips cut 790814 dead arcs removed a linear concatenation lap, 420836 concatenated a linear concatenation lap, 15342 concatenated a linear concatenation lap, 44 concatenated a linear concatenation lap, 0 concatenated 2680286 edges in graph there're 8458092 edges after compacting 2696418 edges left 231049 ctgs longer than 100, sum up 65547502bp, with average length 283 the longest is 10274bp, contig N50 is 377 bp,contig N90 is 124 bp 1344675 contigs longer than 28 output time elapsed: 6m map -s config_file.txt -g SoapOutput -p 24 K = 27 contig len cutoff: 29 there're 1344675 contigs in file: SoapOutput, max seq len 10274, min seq len 28, max name len 10 time spent on parse contigs file 1s 24 thread created time spent on hash reads: 23s 76111652 nodes allocated, 77241749 kmer in reads, 77241749 kmer processed time spent on De bruijn graph construction: 24s time spent on mapping long reads: 0s In file: config_file.txt, max seq len 75, max name len 256 24 thread created 2696418 edges in graph read from file: filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz read from file: filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz current insert size 200, map_len 0 Output 28576560 out of 79646478 (35.9)% reads in gaps 56423503 out of 79646478 (70.8)% reads mapped to contigs 1 pe insert size, the largest boundary is 79646478 [LIB] 0, avg_ins 0, reverse 0 [LIB] 1, avg_ins 0, reverse 0 [LIB] 2, avg_ins 200, reverse 0 time spent on mapping reads: 306s overall time for alignment: 5m scaff -g SoapOutput -p 24 there're 1 grads, 79646478 reads, max read len 75 K = 27 there're 2696418 edge in edge file average contig coverage is 18, 593216 contig masked Mask contigs shorter than 29, 1438864 contig masked 3562964 arcs loaded input 1344675 contigs done loading updated edges time spent on loading edges 12s 12387378 PEs with insert size 200 attached, 80133 + 938973 + 0 ignored estimated PE size 162, by 11546002 pairs on contigs longer than 200, 9536514 pairs found,SD=52, insert_size estimated: 165 1434317 new connections all PEs attached time spent on loading pair end info 58s 1130190 link to masked contigs, 0 links on a single scaff Insert size 200: 1434316 links input Cutoff for number of pairs to make a reliable connection: 3 353948 weak connects removed (there were 608252 active cnnects)) 48 circles removed variance for insert size 20 a remove transitive lag, 1248 connections removed a remove transitive lag, 0 connections removed Picked 34648 subgraphs,83 have conflicting connections,31381 have significant overlapping, 21 eligible maskRepeat: 17209 contigs masked from 19979 puzzles a remove transitive lag, 204 connections removed a remove transitive lag, 0 connections removed Picked 2061 subgraphs,12 have conflicting connections,1746 have significant overlapping, 1 eligible non-strict linearization Picked 1692 subgraphs,52 have conflicting connections,1091 have significant overlapping, 1 eligible Masked 740 contigs, 0 puzzle left Freezing is done.... 61 contigs recovered all links loaded time spent on creating scaffolds 32s the final rank 37163 scaffolds from 231026 contigs sum up 32661237bp, with average length 878, 0 gaps filled 189264 scaffolds&singleton sum up 64688676bp, with average length 341 the longest is 18150bp,scaffold N50 is 636 bp, scaffold N90 is 120 bp Found 0 weak points in scaffolds Processed 1000 scaffolds Processed 2000 scaffolds Processed 3000 scaffolds Processed 4000 scaffolds Processed 5000 scaffolds Processed 6000 scaffolds Processed 7000 scaffolds Processed 8000 scaffolds Processed 9000 scaffolds Processed 10000 scaffolds Processed 11000 scaffolds Processed 12000 scaffolds Processed 13000 scaffolds Processed 14000 scaffolds Processed 15000 scaffolds Processed 16000 scaffolds Processed 17000 scaffolds Processed 18000 scaffolds Processed 19000 scaffolds Processed 20000 scaffolds Processed 21000 scaffolds Processed 22000 scaffolds Processed 23000 scaffolds Processed 24000 scaffolds Processed 25000 scaffolds Processed 26000 scaffolds Processed 27000 scaffolds Processed 28000 scaffolds Processed 29000 scaffolds Processed 30000 scaffolds Processed 31000 scaffolds Processed 32000 scaffolds Processed 33000 scaffolds Processed 34000 scaffolds Processed 35000 scaffolds Processed 36000 scaffolds Processed 37000 scaffolds Done with 37163 scaffolds, 0 gaps finished, 68524 gaps overall time elapsed: 1m time for the whole pipeline: 42m {"status":"success","message":"","result":{"id":6352,"name":"SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","owner":"sr320","system":"lonestar4.tacc.teragrid.org","software":"soapdenovo-1.05u1","processors":1,"requestedTime":"01:00:00","memory":1000,"callbackUrl":null,"archive":true,"archivePath":"/sr320/analyses/SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","outputPath":null,"outputUrl":"https://foundation.iplantcollaborative.org/io-v1/io/list//sr320/analyses/SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","status":"FINISHED","submitTime":1359028060000,"startTime":1359028149000,"endTime":1359030945821,"inputs":[{"reads1_1":"/sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz"},{"reads1_2":"/sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz"}],"parameters":[{"dEdges":""},{"format5":"q"},{"unmask":""},{"format4":"q"},{"Output":"SoapOutput"},{"format3":"q"},{"repeats":""},{"format2":"q"},{"format1":"q"},{"avg_ins2":""},{"avg_ins1":"200"},{"n_cpu":"24"},{"avg_ins4":""},{"gapLenDiff":"50"},{"avg_ins3":"--reverse_seqs3=0"},{"avg_ins5":""},{"kmer":"27"},{"asm_flags1":"3"},{"asm_flags2":"3"},{"reverse_seq1":"0"},{"minLen":""},{"rank3":""},{"reverse_seq4":""},{"rank2":""},{"reverse_seq5":""},{"reverse_seq2":"0"},{"max_rd_len":"75"},{"rank1":""},{"reverse_seq3":""},{"mergeLevel":"-M"},{"rank5":""},{"dkmers":""},{"rank4":""},{"asm_flags4":"3"},{"asm_flags3":"3"},{"asm_flags5":"3"}],"permissions":[]}}{"status":"success","message":"","result":{"id":6352,"name":"SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","owner":"sr320","system":"lonestar4.tacc.teragrid.org","software":"soapdenovo-1.05u1","processors":1,"requestedTime":"01:00:00","memory":1000,"callbackUrl":null,"archive":true,"archivePath":"/sr320/analyses/SOAP_fish545_PEassembly-2013-01-24-11-47-11.901","outputPath":null,"outputUrl":"https://foundation.iplantcollaborative.org/apps-v1/job/6352/output/list","status":"ARCHIVING","submitTime":1359028060000,"startTime":1359028149000,"endTime":1359030945821,"inputs":[{"reads1_1":"/sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R1.fastq.gz"},{"reads1_2":"/sr320/fish546/filtered_106A_Female_Mix_GATCAG_L004_R2.fastq.gz"}],"parameters":[{"dEdges":""},{"format5":"q"},{"unmask":""},{"format4":"q"},{"Output":"SoapOutput"},{"format3":"q"},{"repeats":""},{"format2":"q"},{"format1":"q"},{"avg_ins2":""},{"avg_ins1":"200"},{"n_cpu":"24"},{"avg_ins4":""},{"gapLenDiff":"50"},{"avg_ins3":"--reverse_seqs3=0"},{"avg_ins5":""},{"kmer":"27"},{"asm_flags1":"3"},{"asm_flags2":"3"},{"reverse_seq1":"0"},{"minLen":""},{"rank3":""},{"reverse_seq4":""},{"rank2":""},{"reverse_seq5":""},{"reverse_seq2":"0"},{"max_rd_len":"75"},{"rank1":""},{"reverse_seq3":""},{"mergeLevel":"-M"},{"rank5":""},{"dkmers":""},{"rank4":""},{"asm_flags4":"3"},{"asm_flags3":"3"},{"asm_flags5":"3"}],"permissions":[]}}