{ "metadata": { "name": "MG_bedtools" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "raw", "metadata": {}, "source": "MG: I would like to know how many methylated CGs are in promoter regions (first file) and TEs (second file). I am trying to use intersectBed to tell me this\ufeff" }, { "cell_type": "raw", "metadata": {}, "source": "Files\nhttp://eagle.fish.washington.edu/bivalvia/1kbupstream_promoter_v9.bed\n and\nhttp://eagle.fish.washington.edu/bivalvia/TE2.bed\n " }, { "cell_type": "markdown", "metadata": {}, "source": "" }, { "cell_type": "raw", "metadata": {}, "source": "Note the image above was rendered by\n" }, { "cell_type": "raw", "metadata": {}, "source": "Going to save both as unix, in cnidarian\n" }, { "cell_type": "code", "collapsed": false, "input": "cd /Volumes/Bay3/Software/bedtools-2.17.0", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "/Volumes/Bay3/Software/bedtools-2.17.0\n" } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": "ls", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "LICENSE RELEASE_HISTORY \u001b[34mdocs\u001b[m\u001b[m/ \u001b[34mscripts\u001b[m\u001b[m/\r\nMakefile \u001b[34mbin\u001b[m\u001b[m/ \u001b[34mgenomes\u001b[m\u001b[m/ \u001b[34msrc\u001b[m\u001b[m/\r\nREADME.rst \u001b[34mdata\u001b[m\u001b[m/ \u001b[34mobj\u001b[m\u001b[m/ \u001b[34mtest\u001b[m\u001b[m/\r\n" } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": "cd bin", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "/Volumes/Bay3/Software/bedtools-2.17.0/bin\n" } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": "ls", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "\u001b[31mannotateBed\u001b[m\u001b[m* \u001b[31mcomplementBed\u001b[m\u001b[m* \u001b[31mmapBed\u001b[m\u001b[m* \u001b[31mslopBed\u001b[m\u001b[m*\r\n\u001b[31mbamToBed\u001b[m\u001b[m* \u001b[31mcoverageBed\u001b[m\u001b[m* \u001b[31mmaskFastaFromBed\u001b[m\u001b[m* \u001b[31msortBed\u001b[m\u001b[m*\r\n\u001b[31mbamToFastq\u001b[m\u001b[m* \u001b[31mexpandCols\u001b[m\u001b[m* \u001b[31mmergeBed\u001b[m\u001b[m* \u001b[31msubtractBed\u001b[m\u001b[m*\r\n\u001b[31mbed12ToBed6\u001b[m\u001b[m* \u001b[31mfastaFromBed\u001b[m\u001b[m* \u001b[31mmultiBamCov\u001b[m\u001b[m* \u001b[31mtagBam\u001b[m\u001b[m*\r\n\u001b[31mbedToBam\u001b[m\u001b[m* \u001b[31mflankBed\u001b[m\u001b[m* \u001b[31mmultiIntersectBed\u001b[m\u001b[m* \u001b[31munionBedGraphs\u001b[m\u001b[m*\r\n\u001b[31mbedToIgv\u001b[m\u001b[m* \u001b[31mgenomeCoverageBed\u001b[m\u001b[m* \u001b[31mnucBed\u001b[m\u001b[m* \u001b[31mwindowBed\u001b[m\u001b[m*\r\n\u001b[31mbedpeToBam\u001b[m\u001b[m* \u001b[31mgetOverlap\u001b[m\u001b[m* \u001b[31mpairToBed\u001b[m\u001b[m* \u001b[31mwindowMaker\u001b[m\u001b[m*\r\n\u001b[31mbedtools\u001b[m\u001b[m* \u001b[31mgroupBy\u001b[m\u001b[m* \u001b[31mpairToPair\u001b[m\u001b[m*\r\n\u001b[31mclosestBed\u001b[m\u001b[m* \u001b[31mintersectBed\u001b[m\u001b[m* \u001b[31mrandomBed\u001b[m\u001b[m*\r\n\u001b[31mclusterBed\u001b[m\u001b[m* \u001b[31mlinksBed\u001b[m\u001b[m* \u001b[31mshuffleBed\u001b[m\u001b[m*\r\n" } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": "run intersectBed -h", "language": "python", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (intersectBed, line 2)", "output_type": "pyerr", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"/Volumes/Bay3/Software/bedtools-2.17.0/bin/intersectBed\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m ${0%/*}/bedtools intersect \"$@\"\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": "./intersectBed -h", "language": "python", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 1)", "output_type": "pyerr", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m ./intersectBed -h\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": "intersectBed", "language": "python", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'intersectBed' is not defined", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mintersectBed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'intersectBed' is not defined" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": "$intersectBed -h", "language": "python", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 1)", "output_type": "pyerr", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m $intersectBed -h\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": "cp /Volumes/Bay3/Software/bedtools-2.17.0/bin/* /usr/local/bin", "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": "!intersectBed -h", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "\r\nTool: bedtools intersect (aka intersectBed)\r\nVersion: v2.17.0\r\nSummary: Report overlaps between two feature files.\r\n\r\nUsage: bedtools intersect [OPTIONS] -a -b \r\n\r\nOptions: \r\n\t-abam\tThe A input file is in BAM format. Output will be BAM as well.\r\n\r\n\t-ubam\tWrite uncompressed BAM output. Default writes compressed BAM.\r\n\r\n\t-bed\tWhen using BAM input (-abam), write output as BED. The default\r\n\t\tis to write output in BAM when using -abam.\r\n\r\n\t-wa\tWrite the original entry in A for each overlap.\r\n\r\n\t-wb\tWrite the original entry in B for each overlap.\r\n\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r.\r\n\r\n\t-loj\tPerform a \"left outer join\". That is, for each feature in A\r\n\t\treport each overlap with B. If no overlaps are found, \r\n\t\treport a NULL feature for B.\r\n\r\n\t-wo\tWrite the original A and B entries plus the number of base\r\n\t\tpairs of overlap between the two features.\r\n\t\t- Overlaps restricted by -f and -r.\r\n\t\t Only A features with overlap are reported.\r\n\r\n\t-wao\tWrite the original A and B entries plus the number of base\r\n\t\tpairs of overlap between the two features.\r\n\t\t- Overlapping features restricted by -f and -r.\r\n\t\t However, A features w/o overlap are also reported\r\n\t\t with a NULL B feature and overlap = 0.\r\n\r\n\t-u\tWrite the original A entry _once_ if _any_ overlaps found in B.\r\n\t\t- In other words, just report the fact >=1 hit was found.\r\n\t\t- Overlaps restricted by -f and -r.\r\n\r\n\t-c\tFor each entry in A, report the number of overlaps with B.\r\n\t\t- Reports 0 for A entries that have no overlap with B.\r\n\t\t- Overlaps restricted by -f and -r.\r\n\r\n\t-v\tOnly report those entries in A that have _no overlaps_ with B.\r\n\t\t- Similar to \"grep -v\" (an homage).\r\n\r\n\t-f\tMinimum overlap required as a fraction of A.\r\n\t\t- Default is 1E-9 (i.e., 1bp).\r\n\t\t- FLOAT (e.g. 0.50)\r\n\r\n\t-r\tRequire that the fraction overlap be reciprocal for A and B.\r\n\t\t- In other words, if -f is 0.90 and -r is used, this requires\r\n\t\t that B overlap 90% of A and A _also_ overlaps 90% of B.\r\n\r\n\t-s\tRequire same strandedness. That is, only report hits in B\r\n\t\tthat overlap A on the _same_ strand.\r\n\t\t- By default, overlaps are reported without respect to strand.\r\n\r\n\t-S\tRequire different strandedness. That is, only report hits in B\r\n\t\tthat overlap A on the _opposite_ strand.\r\n\t\t- By default, overlaps are reported without respect to strand.\r\n\r\n\t-split\tTreat \"split\" BAM or BED12 entries as distinct BED intervals.\r\n\r\n\t-sorted\tUse the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input\r\n\r\n\t-header\tPrint the header from the A file prior to results.\r\n\r\nNotes: \r\n\t(1) When a BAM file is used for the A file, the alignment is retained if overlaps exist,\r\n\tand exlcuded if an overlap cannot be found. If multiple overlaps exist, they are not\r\n\treported, as we are only testing for one or more overlaps.\r\n\r\n" } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": "!intersectBed -c -a /Volumes/web/bivalvia/hevilymethgill.bed -b /Volumes/web/cnidarian/1kbupstream_promoter_v9_fixed> /Volumes/web/cnidarian/MG_intersect_hevy_promoter", "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": "!head /Volumes/web/cnidarian/MG_intersect_hevy_promoter", "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": "C1009\t89\t90\tCG\t0.833\t0\r\nC10093\t107\t108\tCG\t0.875\t0\r\nC10137\t13\t14\tCG\t0.910\t0\r\nC10137\t18\t19\tCG\t0.947\t0\r\nC10137\t30\t31\tCG\t0.906\t0\r\nC10137\t39\t40\tCG\t0.885\t0\r\nC10333\t92\t93\tCG\t0.857\t0\r\nC10333\t94\t95\tCG\t1.000\t0\r\nC10411\t112\t113\tCG\t0.857\t0\r\nC10429\t75\t76\tCG\t0.944\t0\r\n" } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": "", "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }