{ "metadata": { "name": "", "signature": "sha256:1640e5fc807b93556e66f3c7bb5522d1f365b815e12bfe0b2c9a28ebf78424c7" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "The last several tries of this, it has gotten stuck somewhere along the way and hasn't gone all the way to the end of the commands on it's own. Here's another try at that. " ] }, { "cell_type": "code", "collapsed": false, "input": [ "#works well when choose \"Run All\" option under \"Cell\" tab." ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "wd=\"/Volumes/web/scaphapoda/Grace/Transcriptomes/mer_tst\"\n", "dircode=\"me\"" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "cd {wd}" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/scaphapoda/Grace/Transcriptomes/mer_tst\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastx \\\n", "-query query.fa \\\n", "-db /Volumes/Data/blast_db/uniprot_sprot \\\n", "-max_target_seqs 1 \\\n", "-max_hsps 1 \\\n", "-outfmt 6 \\\n", "-num_threads 8 \\\n", "-out blast_sprot.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "!say hello" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc -l blast_sprot.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 35 blast_sprot.tab\r\n" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "!tr '|' \"\\t\" blast_sprot_sql.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "!head blast_sprot_sql.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Mmercenaria_Contig_1\tsp\tP06538\tDPOL_ADE12\t26.09\t46\t34\t0\t141\t4\t100\t145\t6.2\t28.5\r\n", "Mmercenaria_Contig_2\tsp\tQ6DRI1\tEI3EA_DANRE\t75.00\t68\t17\t0\t5\t208\t114\t181\t2e-29\t 112\r\n", "Mmercenaria_Contig_3\tsp\tO94823\tAT10B_HUMAN\t61.11\t18\t7\t0\t162\t215\t99\t116\t2.2\t29.6\r\n", "Mmercenaria_Contig_5\tsp\tP0A5H8\tEFPP_MYCTU\t63.16\t19\t7\t0\t117\t61\t20\t38\t0.64\t31.2\r\n", "Mmercenaria_Contig_6\tsp\tQ9WU60\tATRN_MOUSE\t28.85\t52\t33\t1\t168\t13\t808\t855\t0.12\t33.9\r\n", "Mmercenaria_Contig_8\tsp\tP18547\tVNCS_PAVPN\t50.00\t22\t11\t0\t111\t176\t362\t383\t0.85\t30.8\r\n", "Mmercenaria_Contig_9\tsp\tA8WGF4\tIF122_XENTR\t67.16\t67\t22\t0\t1\t201\t894\t960\t6e-24\t99.4\r\n", "Mmercenaria_Contig_10\tsp\tQ4QK86\tMUKB_HAEI8\t29.79\t47\t33\t0\t16\t156\t262\t308\t1.6\t30.0\r\n", "Mmercenaria_Contig_11\tsp\tQ0AQ76\tTHIG_MARMM\t34.09\t44\t27\t1\t210\t79\t84\t125\t3.4\t28.9\r\n", "Mmercenaria_Contig_12\tsp\tP15106\tGLNA_STRCO\t39.29\t28\t17\t0\t40\t123\t124\t151\t0.84\t30.8\r\n" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \\\n", "-d {dircode}_uniprot \\\n", "blast_sprot_sql.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "processing chunk line 0 to 35 (0.000410795211792 s elapsed)\r\n", "pushing blast_sprot_sql.tab...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "parsing 19413EC4...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "finished me_uniprot\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \\\n", "-s \"SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[me_uniprot]me left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on me.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on me.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'\" \\\n", "-f tsv \\\n", "-o {dircode}_descriptions.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "!head {dircode}_descriptions.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Column1\tterm\tGOSlim_bin\taspect\tProteinName\r", "\r\n", "Mmercenaria_Contig_31\tpolysaccharide catabolic process\tother metabolic processes\tP\t\"Endoglucanase B (EC 3.2.1.4) (Cellulase B) (Endo-1,4-beta-glucanase B) (EG-B)\"\r", "\r\n", "Mmercenaria_Contig_35\tdouble-strand break repair via homologous recombination\tstress response\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n", "Mmercenaria_Contig_35\tdouble-strand break repair via homologous recombination\tDNA metabolism\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n", "Mmercenaria_Contig_35\toocyte maturation\tother biological processes\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n", "Mmercenaria_Contig_36\tregulation of cell growth\tother biological processes\tP\tSuppressor of cytokine signaling 2 (SOCS-2) (Cytokine-inducible SH2 protein 2) (CIS-2) (STAT-induced STAT inhibitor 2) (SSI-2)\r", "\r\n", "Mmercenaria_Contig_35\tinner cell mass cell proliferation\tcell cycle and proliferation\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n", "Mmercenaria_Contig_35\tinner cell mass cell proliferation\tdevelopmental processes\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n", "Mmercenaria_Contig_42\t\"complement activation, lectin pathway\"\tstress response\tP\tMannose-binding protein C (MBP-C) (MBP1) (Mannan-binding protein) (Mannose-binding lectin)\r", "\r\n", "Mmercenaria_Contig_42\t\"complement activation, lectin pathway\"\tprotein metabolism\tP\tMannose-binding protein C (MBP-C) (MBP1) (Mannan-binding protein) (Mannose-binding lectin)\r", "\r\n" ] } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "!egrep --color \"male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation|penile|prostate|vulval\" {dircode}_descriptions.txt > {dircode}_reprot.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 34 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc -l {dircode}_reprot.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 2 me_reprot.txt\r\n" ] } ], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "!head me_reprot.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Mmercenaria_Contig_35\tmale meiosis I\tcell cycle and proliferation\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n", "Mmercenaria_Contig_35\tfemale gonad development\tdevelopmental processes\tP\tBreast cancer type 2 susceptibility protein (Fanconi anemia group D1 protein)\r", "\r\n" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "#now to insert a chart" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "pylab inline" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "from pandas import *\n", "\n", "jslim = read_table(\"me_reprot.txt\", # name of the data file\n", " #sep=\",\", # what character separates each column?\n", " na_values=[\"\", \" \"]) # what values should be considered \"blank\" values?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "jslim.head" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 39, "text": [ "" ] } ], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [ "#how do i do the \"groupby\" part of following command... is there a way to do it my \"egrep term\"?..." ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "jslim.groupby('').Column1.count().plot(kind='bar')" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'DataFrameGroupBy' object has no attribute 'Column1'", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mjslim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'egrep term'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mColumn1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'bar'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/bioinformatics/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m raise AttributeError(\"%r object has no attribute %r\" %\n\u001b[0;32m--> 298\u001b[0;31m (type(self).__name__, attr))\n\u001b[0m\u001b[1;32m 299\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'DataFrameGroupBy' object has no attribute 'Column1'" ] } ], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }