{ "metadata": { "name": "", "signature": "sha256:8826f0dd553dcc156a011e7a7b647823930ef7e9838ad17654652a62d0d947c5" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Looking at Query" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# head it the command to look at the first line of a file\n", "!head -2 /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">Ostrea_lur_contig1\r\n", "GATTTGACAGCCTCTATTTCTTGATTGGATTTGACAGCCTCTATTTCTTGGACCTGAAGTAGTCATTGAGAACATCCAGGGCTTGAGACTCCTTTCCGTAATCCTTGACTACCACACAGCTGCAGCCAACAACTTTCCTGGCTTTGCCCTCTTTGTCAATCTTACAAAGACCAGACCACTCTCCCAGTTTTTTGTTGTCATCAACCTTCAAAAGGTTGATCCCATGTTCAGCACAGAGTGCTTCCACCAGCTTTACATACATGGGTTCATCGCAGTTGTTGGCAAGAATGCACAAATGAGCTTGACGTTTGTCAAGGGCCTTTGCACACTCATGCAGTCCTTTTGCTAAACCATCGTGAATCATGGCCGTCTTCAGAACTTCCTGGACCGCTGTAAATACGTCCATCGATCCACCCGATACAGTGGGAACATCATCTCCTTCCGCATCCGACATTTTCGCGAAAACGGACGTCCTTAATGATGTTCCAAGAGAAGTT\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "_Ostrea lurida_ is an **oyster**." ] }, { "cell_type": "code", "collapsed": false, "input": [ "#wc is word count, if I only want line number I can do -l\n", "!wc -l /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 82272 /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Below is the number of genes in the transcriptome" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep -c \">\" /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "41136\r\n" ] } ], "prompt_number": 46 }, { "cell_type": "markdown", "metadata": {}, "source": [ "I ran this in terminal" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "/Applications/ncbi-blast-2.2.29+/bin/blastx \\\n", "-query /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt \\\n", "-db /Volumes/web/whale/blast/db/uniprot_sprot \\\n", "-outfmt 6 \\\n", "-out /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot.tab\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This was the output..." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1\tsp|P84175|RS12_CHICK\t82.40\t125\t22\t0\t418\t44\t8\t132\t7e-71\t 216\r\n", "Ostrea_lur_contig1\tsp|P46405|RS12_PIG\t82.40\t125\t22\t0\t418\t44\t8\t132\t8e-71\t 216\r\n", "Ostrea_lur_contig1\tsp|P25398|RS12_HUMAN\t82.40\t125\t22\t0\t418\t44\t8\t132\t8e-71\t 216\r\n", "Ostrea_lur_contig1\tsp|Q76I81|RS12_BOVIN\t82.40\t125\t22\t0\t418\t44\t8\t132\t8e-71\t 216\r\n", "Ostrea_lur_contig1\tsp|O13019|RS12_ORENI\t81.10\t127\t24\t0\t424\t44\t6\t132\t3e-70\t 215\r\n", "Ostrea_lur_contig1\tsp|P47840|RS12_XENLA\t81.60\t125\t23\t0\t418\t44\t8\t132\t1e-69\t 214\r\n", "Ostrea_lur_contig1\tsp|P63324|RS12_RAT\t81.60\t125\t23\t0\t418\t44\t8\t132\t2e-69\t 213\r\n", "Ostrea_lur_contig1\tsp|P63323|RS12_MOUSE\t81.60\t125\t23\t0\t418\t44\t8\t132\t2e-69\t 213\r\n", "Ostrea_lur_contig1\tsp|Q9SMI3|RS12_CYAPA\t65.15\t132\t43\t1\t442\t47\t8\t136\t1e-58\t 186\r\n", "Ostrea_lur_contig1\tsp|P80455|RS12_DROME\t64.12\t131\t44\t1\t433\t50\t7\t137\t9e-52\t 168\r\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig2626\tsp|Q99MZ8|LASP1_RAT\t34.29\t35\t23\t0\t455\t559\t5\t39\t4.8\t33.5\r\n", "Ostrea_lur_contig2626\tsp|Q14847|LASP1_HUMAN\t34.29\t35\t23\t0\t455\t559\t5\t39\t5.2\t33.1\r\n", "Ostrea_lur_contig2626\tsp|Q5R5W0|LASP1_PONAB\t34.29\t35\t23\t0\t455\t559\t5\t39\t5.5\t33.1\r\n", "Ostrea_lur_contig2626\tsp|O77506|LASP1_RABIT\t34.29\t35\t23\t0\t455\t559\t5\t39\t5.5\t33.1\r\n", "Ostrea_lur_contig2626\tsp|Q8BGB5|LIMD2_MOUSE\t30.77\t65\t44\t1\t449\t643\t39\t102\t7.2\t31.6\r\n", "Ostrea_lur_contig2626\tsp|Q86I44|LIMF_DICDI\t24.07\t108\t65\t5\t473\t775\t87\t184\t7.3\t32.3\r\n", "Ostrea_lur_contig2626\tsp|O43900|PRIC3_HUMAN\t26.60\t94\t64\t4\t515\t796\t211\t299\t7.4\t33.1\r\n", "Ostrea_lur_contig2626\tsp|Q05158|CSRP2_COTJA\t24.72\t89\t58\t2\t386\t628\t89\t176\t8.3\t32.3\r\n", "Ostrea_lur_contig2626\tsp|Q80XB4|NRAP_MOUSE\t28.21\t39\t28\t0\t443\t559\t2\t40\t8.5\t33.1\r\n", "Ostrea_lur_contig2626\tsp|P50460|CSRP2_CHICK\t25.61\t82\t52\t2\t407\t628\t96\t176\t9.1\t32.3\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Not sure but blast likely stopped early**" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Blasting again" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!/Applications/ncbi-blast-2.2.29+/bin/blastx \\\n", "-query /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt \\\n", "-db /Volumes/web/whale/blast/db/uniprot_sprot \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt 6 \\\n", "-num_threads 2 \\\n", "-out /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 690 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 59 replaced by X\r\n", "Selenocysteine (U) at position 297 replaced by X\r\n", "Selenocysteine (U) at position 307 replaced by X\r\n", "Selenocysteine (U) at position 338 replaced by X\r\n", "Selenocysteine (U) at position 350 replaced by X\r\n", "Selenocysteine (U) at position 363 replaced by X\r\n", "Selenocysteine (U) at position 365 replaced by X\r\n", "Selenocysteine (U) at position 372 replaced by X\r\n", "Selenocysteine (U) at position 388 replaced by X\r\n", "Selenocysteine (U) at position 390 replaced by X\r\n", "Selenocysteine (U) at position 397 replaced by X\r\n", "Selenocysteine (U) at position 399 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 436 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 52 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 49 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 52 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 46 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 64 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 95 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 93 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 273 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 38 replaced by X\r\n", "Selenocysteine (U) at position 38 replaced by X\r\n", "Selenocysteine (U) at position 48 replaced by X\r\n", "Selenocysteine (U) at position 48 replaced by X\r\n", "Selenocysteine (U) at position 41 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n", "Selenocysteine (U) at position 13 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 192 replaced by X\r\n", "Selenocysteine (U) at position 187 replaced by X\r\n", "Selenocysteine (U) at position 189 replaced by X\r\n", "Selenocysteine (U) at position 189 replaced by X\r\n", "Selenocysteine (U) at position 188 replaced by X\r\n", "Selenocysteine (U) at position 189 replaced by X\r\n", "Selenocysteine (U) at position 189 replaced by X\r\n", "Selenocysteine (U) at position 189 replaced by X\r\n", "Selenocysteine (U) at position 188 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 43 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 129 replaced by X\r\n", "Selenocysteine (U) at position 129 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 430 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 637 replaced by X\r\n", "Selenocysteine (U) at position 690 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 59 replaced by X\r\n", "Selenocysteine (U) at position 267 replaced by X\r\n", "Selenocysteine (U) at position 273 replaced by X\r\n", "Selenocysteine (U) at position 279 replaced by X\r\n", "Selenocysteine (U) at position 290 replaced by X\r\n", "Selenocysteine (U) at position 292 replaced by X\r\n", "Selenocysteine (U) at position 294 replaced by X\r\n", "Selenocysteine (U) at position 310 replaced by X\r\n", "Selenocysteine (U) at position 320 replaced by X\r\n", "Selenocysteine (U) at position 322 replaced by X\r\n", "Selenocysteine (U) at position 336 replaced by X\r\n", "Selenocysteine (U) at position 338 replaced by X\r\n", "Selenocysteine (U) at position 346 replaced by X\r\n", "Selenocysteine (U) at position 353 replaced by X\r\n", "Selenocysteine (U) at position 355 replaced by X\r\n", "Selenocysteine (U) at position 362 replaced by X\r\n", "Selenocysteine (U) at position 364 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 493 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 637 replaced by X\r\n", "Selenocysteine (U) at position 690 replaced by X\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1\tsp|P84175|RS12_CHICK\t82.40\t125\t22\t0\t418\t44\t8\t132\t7e-71\t 216\r\n", "Ostrea_lur_contig2\tsp|P62248|CS010_BOVIN\t40.27\t149\t78\t4\t25\t444\t28\t174\t3e-24\t 102\r\n", "Ostrea_lur_contig3\tsp|O61231|RL10_DROME\t82.24\t214\t38\t0\t643\t2\t1\t214\t4e-129\t 370\r\n", "Ostrea_lur_contig4\tsp|O76082|S22A5_HUMAN\t31.87\t91\t62\t0\t465\t193\t433\t523\t4e-11\t64.3\r\n", "Ostrea_lur_contig5\tsp|Q2KJG1|SPAT6_BOVIN\t56.10\t41\t18\t0\t14\t136\t437\t477\t1e-08\t58.5\r\n", "Ostrea_lur_contig6\tsp|P84082|ARF2_RAT\t92.62\t149\t11\t0\t85\t531\t1\t149\t8e-98\t 287\r\n", "Ostrea_lur_contig7\tsp|A6H782|TEKT3_BOVIN\t51.65\t395\t191\t0\t386\t1570\t87\t481\t8e-135\t 416\r\n", "Ostrea_lur_contig8\tsp|Q04432|HSP31_YEAST\t30.34\t234\t134\t8\t728\t84\t5\t228\t1e-24\t 102\r\n", "Ostrea_lur_contig9\tsp|P25114|F264_RAT\t63.11\t366\t133\t2\t1334\t240\t104\t468\t2e-170\t 494\r\n", "Ostrea_lur_contig10\tsp|P62909|RS3_RAT\t87.13\t101\t13\t0\t355\t53\t137\t237\t1e-56\t 182\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1819\tsp|Q0UTQ5|UTP25_PHANO\t34.85\t66\t35\t1\t655\t482\t97\t162\t0.93\t34.7\r\n", "Ostrea_lur_contig1820\tsp|Q8C9M2|CCD15_MOUSE\t42.68\t157\t85\t1\t699\t229\t633\t784\t4e-20\t98.2\r\n", "Ostrea_lur_contig1821\tsp|Q9W0Y6|PAIN_DROME\t40.54\t37\t20\t1\t247\t143\t482\t518\t5.3\t28.9\r\n", "Ostrea_lur_contig1822\tsp|A2BID5|MELT_DANRE\t25.40\t63\t47\t0\t12\t200\t90\t152\t0.40\t32.3\r\n", "Ostrea_lur_contig1823\tsp|Q28740|BASI_RABIT\t35.57\t194\t107\t7\t697\t1245\t82\t268\t1e-21\t 100\r\n", "Ostrea_lur_contig1824\tsp|Q9BY49|PECR_HUMAN\t55.97\t268\t115\t2\t52\t849\t15\t281\t6e-102\t 310\r\n", "Ostrea_lur_contig1825\tsp|P50141|GCH1_CHICK\t89.09\t110\t12\t0\t503\t174\t126\t235\t7e-67\t 210\r\n", "Ostrea_lur_contig1826\tsp|Q28619|NHRF1_RABIT\t56.44\t101\t42\t1\t130\t432\t12\t110\t7e-28\t 118\r\n", "Ostrea_lur_contig1827\tsp|Q6EE31|TCPQ_CHICK\t66.73\t523\t173\t1\t3\t1568\t23\t545\t0.0\t 743\r\n", "Ostrea_lur_contig1828\tsp|O62703|CTBL1_BOVIN\t65.63\t323\t110\t1\t1126\t161\t241\t563\t8e-130\t 390\r\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc -l /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 1503 /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "It appears this blast didn't finish for an unknown reason. Will try again." ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Use SQLShare to get gene ID info" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#sed 's/abc/XYZ/g' outfile\n", "!sed 's/|/ /g' /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2c.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2c.tab\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1\tsp P84175 RS12_CHICK\t82.40\t125\t22\t0\t418\t44\t8\t132\t7e-71\t 216\r\n", "Ostrea_lur_contig2\tsp P62248 CS010_BOVIN\t40.27\t149\t78\t4\t25\t444\t28\t174\t3e-24\t 102\r\n", "Ostrea_lur_contig3\tsp O61231 RL10_DROME\t82.24\t214\t38\t0\t643\t2\t1\t214\t4e-129\t 370\r\n", "Ostrea_lur_contig4\tsp O76082 S22A5_HUMAN\t31.87\t91\t62\t0\t465\t193\t433\t523\t4e-11\t64.3\r\n", "Ostrea_lur_contig5\tsp Q2KJG1 SPAT6_BOVIN\t56.10\t41\t18\t0\t14\t136\t437\t477\t1e-08\t58.5\r\n", "Ostrea_lur_contig6\tsp P84082 ARF2_RAT\t92.62\t149\t11\t0\t85\t531\t1\t149\t8e-98\t 287\r\n", "Ostrea_lur_contig7\tsp A6H782 TEKT3_BOVIN\t51.65\t395\t191\t0\t386\t1570\t87\t481\t8e-135\t 416\r\n", "Ostrea_lur_contig8\tsp Q04432 HSP31_YEAST\t30.34\t234\t134\t8\t728\t84\t5\t228\t1e-24\t 102\r\n", "Ostrea_lur_contig9\tsp P25114 F264_RAT\t63.11\t366\t133\t2\t1334\t240\t104\t468\t2e-170\t 494\r\n", "Ostrea_lur_contig10\tsp P62909 RS3_RAT\t87.13\t101\t13\t0\t355\t53\t137\t237\t1e-56\t 182\r\n" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "!tr '|' \"\\t\" /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2d.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2d.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1\tsp\tP84175\tRS12_CHICK\t82.40\t125\t22\t0\t418\t44\t8\t132\t7e-71\t 216\r\n", "Ostrea_lur_contig2\tsp\tP62248\tCS010_BOVIN\t40.27\t149\t78\t4\t25\t444\t28\t174\t3e-24\t 102\r\n", "Ostrea_lur_contig3\tsp\tO61231\tRL10_DROME\t82.24\t214\t38\t0\t643\t2\t1\t214\t4e-129\t 370\r\n", "Ostrea_lur_contig4\tsp\tO76082\tS22A5_HUMAN\t31.87\t91\t62\t0\t465\t193\t433\t523\t4e-11\t64.3\r\n", "Ostrea_lur_contig5\tsp\tQ2KJG1\tSPAT6_BOVIN\t56.10\t41\t18\t0\t14\t136\t437\t477\t1e-08\t58.5\r\n", "Ostrea_lur_contig6\tsp\tP84082\tARF2_RAT\t92.62\t149\t11\t0\t85\t531\t1\t149\t8e-98\t 287\r\n", "Ostrea_lur_contig7\tsp\tA6H782\tTEKT3_BOVIN\t51.65\t395\t191\t0\t386\t1570\t87\t481\t8e-135\t 416\r\n", "Ostrea_lur_contig8\tsp\tQ04432\tHSP31_YEAST\t30.34\t234\t134\t8\t728\t84\t5\t228\t1e-24\t 102\r\n", "Ostrea_lur_contig9\tsp\tP25114\tF264_RAT\t63.11\t366\t133\t2\t1334\t240\t104\t468\t2e-170\t 494\r\n", "Ostrea_lur_contig10\tsp\tP62909\tRS3_RAT\t87.13\t101\t13\t0\t355\t53\t137\t237\t1e-56\t 182\r\n" ] } ], "prompt_number": 23 }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Code to get Protein Name from SQLShare**\n", "```\n", "SELECT * FROM [graceac9@washington.edu].[olur_blastx_uniprot_2d.tab]olur\n", " left join \n", " [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp\n", " on\n", " olur.Column3=sp.SPID\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/scaphapoda/Grace/Olur_SPID_description_a.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,Column9,Column10,Column11,Column12,Column13,Column14,SPID,Status,ProteinName,GeneName,Organism,Length\r", "\r\n", "Ostrea_lur_contig1,sp,P84175,RS12_CHICK,82.4,125,22,0,418,44,8,132,7E-71,216,P84175,reviewed,40S ribosomal protein S12,RPS12,Gallus gallus (Chicken),132\r", "\r\n", "Ostrea_lur_contig10,sp,P62909,RS3_RAT,87.13,101,13,0,355,53,137,237,1E-56,182,P62909,reviewed,40S ribosomal protein S3,Rps3,Rattus norvegicus (Rat),243\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,reviewed,\"\"\"Splicing factor, suppressor of white-apricot homolog (Splicing factor, arginine/serine-rich 8) (Suppressor of white apricot protein homolog)\"\"\",Sfswap Sfrs8 Srsf8 Swap,Mus musculus (Mouse),945\r", "\r\n", "Ostrea_lur_contig1001,sp,B8E0N8,ARGC_DICTD,48.39,31,16,0,394,486,10,40,4.9,32.7,B8E0N8,reviewed,N-acetyl-gamma-glutamyl-phosphate reductase (AGPR) (EC 1.2.1.38) (N-acetyl-glutamate semialdehyde dehydrogenase) (NAGSA dehydrogenase),argC Dtur,1786,Dictyoglomus turgidum (strain Z-1310 / DSM 6724)\r", "\r\n", "Ostrea_lur_contig1004,sp,Q9Z1A9,TBCD8_MOUSE,30,180,89,2,753,220,982,1126,5E-17,85.5,Q9Z1A9,reviewed,TBC1 domain family member 8 (BUB2-like protein 1) (Vascular Rab-GAP/TBC-containing protein),Tbc1d8 Hblp1 Vrp,Mus musculus (Mouse),1134\r", "\r\n", "Ostrea_lur_contig1005,sp,P07872,ACOX1_RAT,42.54,228,119,3,2,655,433,658,7E-45,168,P07872,reviewed,\"\"\"Peroxisomal acyl-coenzyme A oxidase 1 (AOX) (EC 1.3.3.6) (Palmitoyl-CoA oxidase) [Cleaved into: Peroxisomal acyl-CoA oxidase 1, A chain; Peroxisomal acyl-CoA oxidase 1, B chain; Peroxisomal acyl-CoA oxidase 1, C chain]\"\"\",Acox1 Acox,Rattus norvegicus (Rat),661\r", "\r\n", "Ostrea_lur_contig1006,sp,Q0V9A9,LACB2_XENTR,31.15,61,36,2,38,217,229,284,5.8,29.3,Q0V9A9,reviewed,Beta-lactamase-like protein 2 (EC 3.-.-.-),lactb2,Xenopus tropicalis (Western clawed frog) (Silurana tropicalis),289\r", "\r\n", "Ostrea_lur_contig1007,sp,Q5RBU8,ROA2_PONAB,55.93,177,78,0,160,690,16,192,5E-66,215,Q5RBU8,reviewed,Heterogeneous nuclear ribonucleoproteins A2/B1 (hnRNP A2/B1),HNRNPA2B1 HNRPA2B1,Pongo abelii (Sumatran orangutan) (Pongo pygmaeus abelii),353\r", "\r\n", "Ostrea_lur_contig1008,sp,Q13247,SRSF6_HUMAN,79.66,59,12,0,1303,1127,123,181,5E-22,103,Q13247,reviewed,\"\"\"Serine/arginine-rich splicing factor 6 (Pre-mRNA-splicing factor SRP55) (Splicing factor, arginine/serine-rich 6)\"\"\",SRSF6 SFRS6 SRP55,Homo sapiens (Human),344\r", "\r\n" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "!tr ',' \"\\t\" /Volumes/web/scaphapoda/Grace/Olur_SPID_description_a.tab\n", "!head /Volumes/web/scaphapoda/Grace/Olur_SPID_description_a.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Column1\tColumn2\tColumn3\tColumn4\tColumn5\tColumn6\tColumn7\tColumn8\tColumn9\tColumn10\tColumn11\tColumn12\tColumn13\tColumn14\tSPID\tStatus\tProteinName\tGeneName\tOrganism\tLength\r", "\r\n", "Ostrea_lur_contig1\tsp\tP84175\tRS12_CHICK\t82.4\t125\t22\t0\t418\t44\t8\t132\t7E-71\t216\tP84175\treviewed\t40S ribosomal protein S12\tRPS12\tGallus gallus (Chicken)\t132\r", "\r\n", "Ostrea_lur_contig10\tsp\tP62909\tRS3_RAT\t87.13\t101\t13\t0\t355\t53\t137\t237\t1E-56\t182\tP62909\treviewed\t40S ribosomal protein S3\tRps3\tRattus norvegicus (Rat)\t243\r", "\r\n", "Ostrea_lur_contig100\tsp\tQ3USH5\tSFSWA_MOUSE\t45.76\t236\t102\t4\t143\t772\t73\t308\t9E-34\t143\tQ3USH5\treviewed\t\"\"\"Splicing factor\t suppressor of white-apricot homolog (Splicing factor\t arginine/serine-rich 8) (Suppressor of white apricot protein homolog)\"\"\"\tSfswap Sfrs8 Srsf8 Swap\tMus musculus (Mouse)\t945\r", "\r\n", "Ostrea_lur_contig1001\tsp\tB8E0N8\tARGC_DICTD\t48.39\t31\t16\t0\t394\t486\t10\t40\t4.9\t32.7\tB8E0N8\treviewed\tN-acetyl-gamma-glutamyl-phosphate reductase (AGPR) (EC 1.2.1.38) (N-acetyl-glutamate semialdehyde dehydrogenase) (NAGSA dehydrogenase)\targC Dtur\t1786\tDictyoglomus turgidum (strain Z-1310 / DSM 6724)\r", "\r\n", "Ostrea_lur_contig1004\tsp\tQ9Z1A9\tTBCD8_MOUSE\t30\t180\t89\t2\t753\t220\t982\t1126\t5E-17\t85.5\tQ9Z1A9\treviewed\tTBC1 domain family member 8 (BUB2-like protein 1) (Vascular Rab-GAP/TBC-containing protein)\tTbc1d8 Hblp1 Vrp\tMus musculus (Mouse)\t1134\r", "\r\n", "Ostrea_lur_contig1005\tsp\tP07872\tACOX1_RAT\t42.54\t228\t119\t3\t2\t655\t433\t658\t7E-45\t168\tP07872\treviewed\t\"\"\"Peroxisomal acyl-coenzyme A oxidase 1 (AOX) (EC 1.3.3.6) (Palmitoyl-CoA oxidase) [Cleaved into: Peroxisomal acyl-CoA oxidase 1\t A chain; Peroxisomal acyl-CoA oxidase 1\t B chain; Peroxisomal acyl-CoA oxidase 1\t C chain]\"\"\"\tAcox1 Acox\tRattus norvegicus (Rat)\t661\r", "\r\n", "Ostrea_lur_contig1006\tsp\tQ0V9A9\tLACB2_XENTR\t31.15\t61\t36\t2\t38\t217\t229\t284\t5.8\t29.3\tQ0V9A9\treviewed\tBeta-lactamase-like protein 2 (EC 3.-.-.-)\tlactb2\tXenopus tropicalis (Western clawed frog) (Silurana tropicalis)\t289\r", "\r\n", "Ostrea_lur_contig1007\tsp\tQ5RBU8\tROA2_PONAB\t55.93\t177\t78\t0\t160\t690\t16\t192\t5E-66\t215\tQ5RBU8\treviewed\tHeterogeneous nuclear ribonucleoproteins A2/B1 (hnRNP A2/B1)\tHNRNPA2B1 HNRPA2B1\tPongo abelii (Sumatran orangutan) (Pongo pygmaeus abelii)\t353\r", "\r\n", "Ostrea_lur_contig1008\tsp\tQ13247\tSRSF6_HUMAN\t79.66\t59\t12\t0\t1303\t1127\t123\t181\t5E-22\t103\tQ13247\treviewed\t\"\"\"Serine/arginine-rich splicing factor 6 (Pre-mRNA-splicing factor SRP55) (Splicing factor\t arginine/serine-rich 6)\"\"\"\tSRSF6 SFRS6 SRP55\tHomo sapiens (Human)\t344\r", "\r\n" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep --color \"growth\" /Volumes/web/scaphapoda/Grace/Olur_SPID_description_a.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1214\tsp\tQ8BJ66\tKAZD1_MOUSE\t39.61\t207\t115\t4\t287\t886\t74\t277\t3E-41\t155\tQ8BJ66\treviewed\tKazal-type serine protease inhibitor domain-containing protein 1 (Bone and odontoblast-expressed protein 1) (Insulin-like \u001b[01;31m\u001b[Kgrowth\u001b[m\u001b[K factor-binding-related protein 10) (IGFBP-rP10) (IGFBP-related protein 10) (Insulin-like \u001b[01;31m\u001b[Kgrowth\u001b[m\u001b[K factor-binding-related protein 4)\tKazald1 Bono1 Igfbprp10\tMus musculus (Mouse)\t313\r", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1399\tsp\tO95750\tFGF19_HUMAN\t40.74\t27\t16\t0\t101\t181\t58\t84\t6.7\t30\tO95750\treviewed\tFibroblast \u001b[01;31m\u001b[Kgrowth\u001b[m\u001b[K factor 19 (FGF-19)\tFGF19 UNQ334/PRO533\tHomo sapiens (Human)\t216\r", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig181\tsp\tO14495\tLPP3_HUMAN\t35.19\t54\t35\t0\t2638\t2477\t224\t277\t0.0003\t47.8\tO14495\treviewed\tLipid phosphate phosphohydrolase 3 (EC 3.1.3.4) (PAP2-beta) (Phosphatidate phosphohydrolase type 2b) (Phosphatidic acid phosphatase 2b) (PAP-2b) (PAP2b) (Vascular endothelial \u001b[01;31m\u001b[Kgrowth\u001b[m\u001b[K factor and type I collagen-inducible protein) (VCIP)\tPPAP2B LPP3\tHomo sapiens (Human)\t311\r", "\r\n", "Ostrea_lur_contig637\tsp\tQ9NX58\tLYAR_HUMAN\t45.78\t166\t85\t4\t1045\t551\t1\t162\t1E-41\t155\tQ9NX58\treviewed\tCell \u001b[01;31m\u001b[Kgrowth\u001b[m\u001b[K-regulating nucleolar protein\tLYAR PNAS-5\tHomo sapiens (Human)\t379\r", "\r\n", "Ostrea_lur_contig752\tsp\tP15209\tNTRK2_MOUSE\t69.16\t107\t32\t1\t321\t1\t620\t725\t2E-46\t163\tP15209\treviewed\tBDNF/NT-3 \u001b[01;31m\u001b[Kgrowth\u001b[m\u001b[K factors receptor (EC 2.7.10.1) (GP145-TrkB/GP95-TrkB) (Trk-B) (Neurotrophic tyrosine kinase receptor type 2) (TrkB tyrosine kinase)\tNtrk2 Trkb\tMus musculus (Mouse)\t821\r", "\r\n" ] } ], "prompt_number": 31 }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Getting GOslim terms**\n", "```\n", "SELECT * FROM [graceac9@washington.edu].[olur_blastx_uniprot_2d.tab]olur\n", " left join\n", " [sr320@washington.edu].[SPID and GO Numbers]go\n", " on \n", " olur.Column3=go.SPID\n", " \u200bleft join \n", " [sr320@washington.edu].[GO_to_GOslim]slim\n", " on\n", " go.GOID=slim.GO_id\n", " \u200bwhere aspect like 'P'\n", " \n", "``` " ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/scaphapoda/Grace/Olur_goslim_a.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,Column9,Column10,Column11,Column12,Column13,Column14,SPID,GOID,GO_id,term,GOSlim_bin,aspect\r", "\r\n", "Ostrea_lur_contig1,sp,P84175,RS12_CHICK,82.4,125,22,0,418,44,8,132,7E-71,216,P84175,GO:0006412,GO:0006412,translation,protein metabolism,P\r", "\r\n", "Ostrea_lur_contig10,sp,P62909,RS3_RAT,87.13,101,13,0,355,53,137,237,1E-56,182,P62909,GO:0006412,GO:0006412,translation,protein metabolism,P\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,GO:0006351,GO:0006351,\"\"\"transcription, DNA-dependent\"\"\",RNA metabolism,P\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,GO:0006355,GO:0006355,\"\"\"regulation of transcription, DNA-dependent\"\"\",RNA metabolism,P\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,GO:0006396,GO:0006396,RNA processing,RNA metabolism,P\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,GO:0006397,GO:0006397,mRNA processing,RNA metabolism,P\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,GO:0008380,GO:0008380,RNA splicing,RNA metabolism,P\r", "\r\n", "Ostrea_lur_contig100,sp,Q3USH5,SFSWA_MOUSE,45.76,236,102,4,143,772,73,308,9E-34,143,Q3USH5,GO:0048025,GO:0048025,\"\"\"negative regulation of nuclear mRNA splicing, via spliceosome\"\"\",RNA metabolism,P\r", "\r\n", "Ostrea_lur_contig1001,sp,B8E0N8,ARGC_DICTD,48.39,31,16,0,394,486,10,40,4.9,32.7,B8E0N8,GO:0006520,GO:0006520,cellular amino acid metabolic process,other metabolic processes,P\r", "\r\n" ] } ], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc -l /Volumes/web/scaphapoda/Grace/Olur_goslim_a.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 5907 /Volumes/web/scaphapoda/Grace/Olur_goslim_a.csv\r\n" ] } ], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep --color \"gonad\" /Volumes/web/scaphapoda/Grace/Olur_goslim_a.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1030,sp,P51592,HYD_DROME,72.22,18,5,0,1,54,2868,2885,0.53,32,P51592,GO:0008585,GO:0008585,female \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig1060,sp,Q07817,B2CL1_HUMAN,29.06,203,125,5,1728,1162,35,232,6E-17,85.1,Q07817,GO:0008584,GO:0008584,male \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig1202,sp,Q8CHN6,SGPL1_RAT,50.09,553,258,8,2999,1374,14,559,0,578,Q8CHN6,GO:0008585,GO:0008585,female \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig169,sp,O60911,CATL2_HUMAN,53.89,334,147,4,30,1016,3,334,5E-121,361,O60911,GO:0008584,GO:0008584,male \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig169,sp,O60911,CATL2_HUMAN,53.89,334,147,4,30,1016,3,334,5E-121,361,O60911,GO:0034698,GO:0034698,response to \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[Kotropin stimulus,other biological processes,P\r", "\r\n", "Ostrea_lur_contig1763,sp,P56705,WNT4_HUMAN,51.97,279,128,4,1455,634,73,350,5E-98,306,P56705,GO:0008584,GO:0008584,male \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig1763,sp,P56705,WNT4_HUMAN,51.97,279,128,4,1455,634,73,350,5E-98,306,P56705,GO:0008585,GO:0008585,female \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig925,sp,O75534,CSDE1_HUMAN,44.44,378,188,11,1,1086,419,790,2E-87,295,O75534,GO:0008584,GO:0008584,male \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig1702,sp,Q7Z3E1,PARPT_HUMAN,39.81,206,115,4,517,1125,450,649,8E-41,157,Q7Z3E1,GO:0008585,GO:0008585,female \u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K development,developmental processes,P\r", "\r\n", "Ostrea_lur_contig264,sp,Q21270,COG6_CAEEL,48.15,27,14,0,202,282,407,433,3.4,30.4,Q21270,GO:0035262,GO:0035262,\u001b[01;31m\u001b[Kgonad\u001b[m\u001b[K morphogenesis,developmental processes,P\r", "\r\n" ] } ], "prompt_number": 36 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Blasting again to try to finish swiss prot" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!/Applications/ncbi-blast-2.2.29+/bin/blastx \\\n", "-query /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt \\\n", "-db /Volumes/web/whale/blast/db/uniprot_sprot \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt 6 \\\n", "-num_threads 2 \\\n", "-out /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_2.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "#fail\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!/Applications/ncbi-blast-2.2.29+/bin/blastx \\\n", "-query /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt \\\n", "-db /Volumes/web/whale/blast/db/uniprot_sprot \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt 6 \\\n", "-num_threads 2 \\\n", "-out /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_3.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 690 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 59 replaced by X\r\n", "Selenocysteine (U) at position 297 replaced by X\r\n", "Selenocysteine (U) at position 307 replaced by X\r\n", "Selenocysteine (U) at position 338 replaced by X\r\n", "Selenocysteine (U) at position 350 replaced by X\r\n", "Selenocysteine (U) at position 363 replaced by X\r\n", "Selenocysteine (U) at position 365 replaced by X\r\n", "Selenocysteine (U) at position 372 replaced by X\r\n", "Selenocysteine (U) at position 388 replaced by X\r\n", "Selenocysteine (U) at position 390 replaced by X\r\n", "Selenocysteine (U) at position 397 replaced by X\r\n", "Selenocysteine (U) at position 399 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 436 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 52 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 49 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 52 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 46 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 64 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 95 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 93 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 273 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 38 replaced by X\r\n", "Selenocysteine (U) at position 38 replaced by X\r\n", "Selenocysteine (U) at position 48 replaced by X\r\n", "Selenocysteine (U) at position 48 replaced by X\r\n", "Selenocysteine (U) at position 41 replaced by X\r\n" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "#runnning on hummingbird\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/scaphapoda/Grace/olur_blastx_uniprot_3.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Ostrea_lur_contig1059\tsp|Q8U3T3|MOAB_PYRFU\t37.84\t37\t22\t1\t1290\t1180\t15\t50\t6.2\t32.7\r\n", "Ostrea_lur_contig1060\tsp|Q07817|B2CL1_HUMAN\t29.06\t203\t125\t5\t1728\t1162\t35\t232\t6e-17\t85.1\r\n", "Ostrea_lur_contig1061\tsp|P80315|TCPD_MOUSE\t76.45\t518\t120\t2\t186\t1736\t21\t537\t0.0\t 822\r\n", "Ostrea_lur_contig1062\tsp|Q00532|CDKL1_HUMAN\t30.80\t289\t163\t8\t1417\t617\t9\t282\t7e-22\t 102\r\n", "Ostrea_lur_contig1063\tsp|B3WEV8|PLSX_LACCB\t30.65\t62\t43\t0\t189\t374\t36\t97\t0.053\t38.9\r\n", "Ostrea_lur_contig1065\tsp|P02637|SCP_MIZYE\t61.36\t176\t66\t2\t43\t567\t2\t176\t3e-71\t 224\r\n", "Ostrea_lur_contig1066\tsp|A6NGW2|STRCL_HUMAN\t25.38\t260\t185\t7\t2737\t3501\t1470\t1725\t0.50\t38.5\r\n", "Ostrea_lur_contig1067\tsp|Q9KU25|NSPS_VIBCH\t50.00\t36\t17\t1\t1610\t1506\t314\t349\t9.2\t33.1\r\n", "Ostrea_lur_contig1068\tsp|Q791B0|UBL5_PSAOB\t87.67\t73\t9\t0\t17\t235\t1\t73\t6e-40\t 137\r\n", "Ostrea_lur_contig1069\tsp|P62264|RS14_MOUSE\t90.00\t140\t13\t1\t502\t86\t1\t140\t3e-88\t 261\r\n" ] } ], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "#fail again" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 45 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Blast again with blast originating from computer rather than eagle" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep -c \">\" /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "fgrep: /Volumes/web/scaphapoda/Grace/Supp_1_Ostrea_lurida_transcriptome.fasta.txt: No such file or directory\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!/Applications/ncbi-blast-2.2.29+/bin/blastx \\\n", "-query /Users/Shared/blast/query/Supp_1_Ostrea_lurida_transcriptome.fa \\\n", "-db /Users/Shared/blast/db/uniprot_sprot \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt 6 \\\n", "-num_threads 2 \\\n", "-out /Users/Shared/blast/out/olur_blastx_uniprot_newdb.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 690 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 59 replaced by X\r\n", "Selenocysteine (U) at position 297 replaced by X\r\n", "Selenocysteine (U) at position 307 replaced by X\r\n", "Selenocysteine (U) at position 338 replaced by X\r\n", "Selenocysteine (U) at position 350 replaced by X\r\n", "Selenocysteine (U) at position 363 replaced by X\r\n", "Selenocysteine (U) at position 365 replaced by X\r\n", "Selenocysteine (U) at position 372 replaced by X\r\n", "Selenocysteine (U) at position 388 replaced by X\r\n", "Selenocysteine (U) at position 390 replaced by X\r\n", "Selenocysteine (U) at position 397 replaced by X\r\n", "Selenocysteine (U) at position 399 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 436 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 52 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 49 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 52 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 46 replaced by X\r\n", "Selenocysteine (U) at position 47 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 40 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 64 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 95 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 93 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n", "Selenocysteine (U) at position 95 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 273 replaced by X\r\n" ] } ] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }