{
"metadata": {
"name": "BiGo_larvae_methylkit"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": "Converting methratio files for methylkit analysis"
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": "REDUX- New reformatting (11/2013)"
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "M1"
},
{
"cell_type": "raw",
"metadata": {},
"source": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_M1] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Users/sr320/Desktop/test.csv"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "```sql\nSELECT \n chr as chr,\n pos as start,\n '+' as strand,\n cast (CT_count as float) as CT_count,\n cast (C_count as float) as C_count,\n cast (C_count as float) / cast (CT_count as float) as freqC,\n 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT\n \nFROM [sr320@washington.edu].[BiGo_lar_M1]\n where \ncontext like '__CG_'\nand\n CT_Count >= 5 \nand \n ratio <> 'NA'\u200b\n``` "
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,C_count,freqC,freqT\r\r\nC10295,51,+,5,0,0,1\r\r\nC11141,58,+,5,0,0,1\r\r\nC11141,73,+,5,0,0,1\r\r\nC11141,78,+,5,0,0,1\r\r\nC11848,108,+,6,0,0,1\r\r\nC12768,103,+,6,1,0.166666666666667,0.833333333333333\r\r\nC13766,126,+,9,0,0,1\r\r\nC13766,145,+,8,0,0,1\r\r\nC14220,96,+,7,0,0,1\r\r\n"
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_input.csv > /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_c.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_c.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,51,+,5,0,0,1\r\r\nC11141,58,+,5,0,0,1\r\r\nC11141,73,+,5,0,0,1\r\r\nC11141,78,+,5,0,0,1\r\r\nC11848,108,+,6,0,0,1\r\r\nC12768,103,+,6,1,0.166666666666667,0.833333333333333\r\r\nC13766,126,+,9,0,0,1\r\r\nC13766,145,+,8,0,0,1\r\r\nC14220,96,+,7,0,0,1\r\r\nC14220,143,+,6,0,0,1\r\r\n"
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tr ',' \"\\t\" /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2.txt",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2.txt",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295\t51\t+\t5\t0\t0\t1\r\r\nC11141\t58\t+\t5\t0\t0\t1\r\r\nC11141\t73\t+\t5\t0\t0\t1\r\r\nC11141\t78\t+\t5\t0\t0\t1\r\r\nC11848\t108\t+\t6\t0\t0\t1\r\r\nC12768\t103\t+\t6\t1\t0.166666666666667\t0.833333333333333\r\r\nC13766\t126\t+\t9\t0\t0\t1\r\r\nC13766\t145\t+\t8\t0\t0\t1\r\r\nC14220\t96\t+\t7\t0\t0\t1\r\r\nC14220\t143\t+\t6\t0\t0\t1\r\r\n"
}
],
"prompt_number": 9
},
{
"cell_type": "heading",
"level": 5,
"metadata": {},
"source": "Hack to get unique ID"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit3_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr_start,chr,start,strand,CT_count,C_count,freqC,freqT\r\r\nC10295_51,C10295,51,+,5,0,0,1\r\r\nC11141_58,C11141,58,+,5,0,0,1\r\r\nC11141_73,C11141,73,+,5,0,0,1\r\r\nC11141_78,C11141,78,+,5,0,0,1\r\r\nC11848_108,C11848,108,+,6,0,0,1\r\r\nC12768_103,C12768,103,+,6,1,0.166666666666667,0.833333333333333\r\r\nC13766_126,C13766,126,+,9,0,0,1\r\r\nC13766_145,C13766,145,+,8,0,0,1\r\r\nC14220_96,C14220,96,+,7,0,0,1\r\r\n"
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": "python tool to download hack"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_M1] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_M1_methylkit4_input.txt\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/Monarch/cnidary/BiGo_lar_M1_methylkit4_input.txt",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr_start\tchr\tstart\tstrand\tCT_count\tC_count\tfreqC\tfreqT\r\r\nC10295_51\tC10295\t51\t+\t5\t0\t0\t1\r\r\nC11141_58\tC11141\t58\t+\t5\t0\t0\t1\r\r\nC11141_73\tC11141\t73\t+\t5\t0\t0\t1\r\r\nC11141_78\tC11141\t78\t+\t5\t0\t0\t1\r\r\nC11848_108\tC11848\t108\t+\t6\t0\t0\t1\r\r\nC12768_103\tC12768\t103\t+\t6\t1\t0.166666666666667\t0.833333333333333\r\r\nC13766_126\tC13766\t126\t+\t9\t0\t0\t1\r\r\nC13766_145\tC13766\t145\t+\t8\t0\t0\t1\r\r\nC14220_96\tC14220\t96\t+\t7\t0\t0\t1\r\r\n"
}
],
"prompt_number": 3
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "T1D3"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T1D3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T1D3_methylkit4_input.txt\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "T1D5"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T1D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T1D5_methylkit4_input.txt\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "M3"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_M3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_M3_methylkit4_input.txt\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "T3D3"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T3D3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T3D3_methylkit4_input.txt\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "T3D5"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T3D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T3D5_methylkit4_input.txt\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "markdown",
"metadata": {},
"source": "In excel and textwrangler made a list of non_redundant locis from all 6 files"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "Left Join in SQLshare on non_redundant ID"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "```sql\nSELECT * \nFROM [sr320@washington.edu].[_BiGo_lar_nonred_ID.txt]id\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_M1_hack]M1\nON id.[chr_start]=M1.[chr_start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D3_hack]T1D3\nON id.[chr_start]=T1D3.[chr_start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D5_hack]T1D5\nON id.[chr_start]=T1D5.[chr_start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_M3_hack]M3\nON id.[chr_start]=M3.[chr_start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D3_hack]T3D3\nON id.[chr_start]=T3D3.[chr_start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D5_hack]T3D5\nON id.[chr_start]=T3D5.[chr_start]\n```"
},
{
"cell_type": "raw",
"metadata": {},
"source": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -d \"[sr320@washington.edu].[BiGo_Larvae_joineddata]\" -o /Volumes/Monarch/cnidary/BiGo_lar_joineddata.csv\n"
},
{
"cell_type": "code",
"collapsed": false,
"input": "python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -d \"[sr320@washington.edu].[Snapshot of BiGo_Larvae_joineddata]\" -o /Volumes/Monarch/cnidary/BiGo_lar_joineddata3.csv\n",
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "Re ReDux 11/6/2013"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "Will try double column join ? as per Dan's Suggestion"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "```sql\nSELECT * \nFROM [sr320@washington.edu].[BiGo_lar_nonred_ID split]id\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_M1_hack]M1\nON id.[chr]=M1.[chr] AND id.[start]=M1.[start]\u200b\n```"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "this worked fine.."
},
{
"cell_type": "code",
"collapsed": false,
"input": "SELECT * \nFROM [sr320@washington.edu].[BiGo_lar_nonred_ID split]id\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_M1_hack]M1\nON id.[chr]=M1.[chr] AND id.[start]=M1.[start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D3_hack]T1D3\nON id.[chr]=T1D3.[chr] AND id.[start]=T1D3.[start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D5_hack]T1D5\nON id.[chr]=T1D5.[chr] AND id.[start]=T1D5.[start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_M3_hack]M3\nON id.[chr]=M3.[chr] AND id.[start]=M3.[start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D3_hack]T3D3\nON id.[chr]=T3D3.[chr] AND id.[start]=T3D3.[start]\nLEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D5_hack]T3D5\nON id.[chr]=T3D5.[chr] AND id.[start]=T3D5.[start]",
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": "![im](files/img/BiGo_lar_joineddata.jpg)\n",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "/bin/sh: -c: line 0: syntax error near unexpected token `files/img/BiGo_lar_joineddata.jpg'\r\n/bin/sh: -c: line 0: `[im](files/img/BiGo_lar_joineddata.jpg)'\r\n"
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": "![pearl](files/img/pearl.jpeg)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "/bin/sh: -c: line 0: syntax error near unexpected token `files/img/pearl.jpeg'\r\n/bin/sh: -c: line 0: `[pearl](files/img/pearl.jpeg)'\r\n"
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": ""
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/Monarch/cnidary/BiGo_lar_lociALL0.txt",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr\tstart\tchr_start\tCountingCs\tSummingCs\r\nC16318\t177\tC16318_177\t6\t0\r\nC16318\t196\tC16318_196\t6\t0\r\nC16318\t203\tC16318_203\t6\t0\r\nC16318\t302\tC16318_302\t6\t0\r\nC19344\t310\tC19344_310\t6\t0\r\nC19356\t52\tC19356_52\t6\t0\r\nC19356\t107\tC19356_107\t6\t0\r\nC19356\t120\tC19356_120\t6\t0\r\nC19356\t123\tC19356_123\t6\t0\r\n"
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": "!wc /Volumes/Monarch/cnidary/BiGo_lar_lociALL0.txt",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": " 3230 16155 130260 /Volumes/Monarch/cnidary/BiGo_lar_lociALL0.txt\r\n"
}
],
"prompt_number": 14
},
{
"cell_type": "markdown",
"metadata": {},
"source": "Up into SQLshare and generate file where loci covered by all 6 datasets and > 0 C_count\n\n```\nSELECT * FROM [sr320@washington.edu].[_BiGo_lar_joineddata_sums.csv]\n where CountingCs = 6\n and\n SummingCs > 0\n```"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/Monarch/cnidary/BiGo_lar_joineddata_mk.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,chr_start,CountingCs,SummingCs,chr1,start1,strand,CT_count,C_count,freqC,freqT,chr_start1,chr2,start2,strand1,CT_count1,C_count1,freqC1,freqT1,chr_start2,chr3,start3,strand2,CT_count2,C_count2,freqC2,freqT2,chr_start3,chr4,start4,strand3,CT_count3,C_count3,freqC3,freqT3,chr_start4,chr5,start5,strand4,CT_count4,C_count4,freqC4,freqT4,chr_start5,chr6,start6,strand5,CT_count5,C_count5,freqC5,freqT5\r\r\nC16318,206,C16318_206,6,1,C16318,206,+,7,0,0,1,C16318_206,C16318,206,+,9,0,0,1,C16318_206,C16318,206,+,7,1,0.142857143,0.857142857,C16318_206,C16318,206,+,12,0,0,1,C16318_206,C16318,206,+,25,0,0,1,C16318_206,C16318,206,+,23,0,0,1\r\r\nC19576,631,C19576_631,6,16,C19576,631,+,35,5,0.142857143,0.857142857,C19576_631,C19576,631,+,16,3,0.1875,0.8125,C19576_631,C19576,631,+,7,1,0.142857143,0.857142857,C19576_631,C19576,631,+,19,3,0.157894737,0.842105263,C19576_631,C19576,631,+,18,1,0.055555556,0.944444444,C19576_631,C19576,631,+,19,3,0.157894737,0.842105263\r\r\nC19576,637,C19576_637,6,29,C19576,637,+,37,8,0.216216216,0.783783784,C19576_637,C19576,637,+,14,6,0.428571429,0.571428571,C19576_637,C19576,637,+,7,1,0.142857143,0.857142857,C19576_637,C19576,637,+,20,6,0.3,0.7,C19576_637,C19576,637,+,16,2,0.125,0.875,C19576_637,C19576,637,+,19,6,0.315789474,0.684210526\r\r\nC19576,403,C19576_403,6,5,C19576,403,+,35,0,0,1,C19576_403,C19576,403,+,12,0,0,1,C19576_403,C19576,403,+,15,0,0,1,C19576_403,C19576,403,+,28,3,0.107142857,0.892857143,C19576_403,C19576,403,+,22,1,0.045454545,0.954545455,C19576_403,C19576,403,+,16,1,0.0625,0.9375\r\r\nC19902,815,C19902_815,6,193,C19902,815,+,81,51,0.62962963,0.37037037,C19902_815,C19902,815,+,48,31,0.645833333,0.354166667,C19902_815,C19902,815,+,31,19,0.612903226,0.387096774,C19902_815,C19902,815,+,47,30,0.638297872,0.361702128,C19902_815,C19902,815,+,64,34,0.53125,0.46875,C19902_815,C19902,815,+,49,28,0.571428571,0.428571429\r\r\nC19902,83,C19902_83,6,314,C19902,83,+,112,73,0.651785714,0.348214286,C19902_83,C19902,83,+,62,39,0.629032258,0.370967742,C19902_83,C19902,83,+,51,27,0.529411765,0.470588235,C19902_83,C19902,83,+,72,39,0.541666667,0.458333333,C19902_83,C19902,83,+,110,73,0.663636364,0.336363636,C19902_83,C19902,83,+,95,63,0.663157895,0.336842105\r\r\nC19902,141,C19902_141,6,376,C19902,141,+,102,74,0.725490196,0.274509804,C19902_141,C19902,141,+,71,53,0.746478873,0.253521127,C19902_141,C19902,141,+,55,38,0.690909091,0.309090909,C19902_141,C19902,141,+,66,47,0.712121212,0.287878788,C19902_141,C19902,141,+,125,93,0.744,0.256,C19902_141,C19902,141,+,98,71,0.724489796,0.275510204\r\r\nC19902,775,C19902_775,6,376,C19902,775,+,137,95,0.693430657,0.306569343,C19902_775,C19902,775,+,94,67,0.712765957,0.287234043,C19902_775,C19902,775,+,56,37,0.660714286,0.339285714,C19902_775,C19902,775,+,66,45,0.681818182,0.318181818,C19902_775,C19902,775,+,112,72,0.642857143,0.357142857,C19902_775,C19902,775,+,85,60,0.705882353,0.294117647\r\r\nC19902,781,C19902_781,6,327,C19902,781,+,123,72,0.585365854,0.414634146,C19902_781,C19902,781,+,85,48,0.564705882,0.435294118,C19902_781,C19902,781,+,56,36,0.642857143,0.357142857,C19902_781,C19902,781,+,64,45,0.703125,0.296875,C19902_781,C19902,781,+,105,72,0.685714286,0.314285714,C19902_781,C19902,781,+,82,54,0.658536585,0.341463415\r\r\n"
}
],
"prompt_number": 15
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": "Creation of new tab delimited text files with following structure \n
\n`chr\tstart\tstrand\tCT_count\tC_count\tfreqC\tfreqT`"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "These files only contain loci covered in all 6 files and at least 1 mCpG"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "\n\n\n\n\n\n\n\n\n\n"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "---"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "**Quicklist** old \nM3 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M3_methylkit.csv` \n \nT3D3 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T3D3_methylkit.csv` \n\nT3D5 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T3D5_methylkit.csv` \n\nM1 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M1_methylkit.csv` \n\nT1D3 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T1D3_methylkit.csv`\n\nT1D5 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T1D5_methylkit.csv` \n\n---\n**QC** \nM3 \n`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M3_methylkit_oner.csv`\n\nBiGo(gonad) \n`http://eagle.fish.washington.edu/cnidarian/BiGO_betty_plain_methylkit.csv`"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGO_betty_plain_methratio_v1.txt] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGO_betty_plain_methylkit_input.csv\n\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGO_betty_plain_methylkit_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,C_count,freqC\r\r\nC10005,57,+,52,0,0\r\r\nC10009,70,+,28,0,0\r\r\nC10009,124,+,13,0,0\r\r\nC10009,128,+,13,0,0\r\r\nC10009,136,+,9,0,0\r\r\nC10011,51,+,14,0,0\r\r\nC10011,62,+,17,0,0\r\r\nC10011,101,+,21,0,0\r\r\nC10011,108,+,21,0,0\r\r\n"
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGO_betty_plain_methylkit_input.csv > /Volumes/web/cnidarian/BiGO_betty_plain_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGO_betty_plain_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10005,57,+,52,0,0\r\r\nC10009,70,+,28,0,0\r\r\nC10009,124,+,13,0,0\r\r\nC10009,128,+,13,0,0\r\r\nC10009,136,+,9,0,0\r\r\nC10011,51,+,14,0,0\r\r\nC10011,62,+,17,0,0\r\r\nC10011,101,+,21,0,0\r\r\nC10011,108,+,21,0,0\r\r\nC10011,115,+,19,0,0\r\r\n"
}
],
"prompt_number": 4
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "M3"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "```sql\nSELECT \n chr as chr,\n pos as start,\n '+' as strand,\n cast (CT_count as float) as CT_count,\n cast (C_count as float) as C_count\nFROM [sr320@washington.edu].[BiGo_lar_M3]\n where \ncontext like '__CG_'\nand\n CT_Count >= 5 \nand \n ratio <> 'NA'\n```"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "```\nSELECT \n chr as chr,\n start as start,\n strand as strand,\n CT_count as CT_count,\n C_count as C_count,\n C_count/CT_count as freqC \n \n \n FROM [sr320@washington.edu].[_M3methylkit_s1]\u200b\n``` "
},
{
"cell_type": "markdown",
"metadata": {},
"source": "```sql\nSELECT \n chr as chr,\n start as start,\n strand as strand,\n CT_count as CT_count,\n freqC as freqC\n \n FROM [sr320@washington.edu].[_M3mehthykit_step2]\u200b\n```"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,freqC\r\r\nC10295,38,+,15,0\r\r\nC10295,51,+,21,0\r\r\nC10295,142,+,9,0\r\r\nC10845,110,+,6,0\r\r\nC10845,145,+,6,0\r\r\nC11248,71,+,5,0\r\r\nC11870,109,+,6,0\r\r\nC12272,65,+,5,1\r\r\nC14220,96,+,6,0\r\r\n"
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,38,+,15,0\r\r\nC10295,51,+,21,0\r\r\nC10295,142,+,9,0\r\r\nC10845,110,+,6,0\r\r\nC10845,145,+,6,0\r\r\nC11248,71,+,5,0\r\r\nC11870,109,+,6,0\r\r\nC12272,65,+,5,1\r\r\nC14220,96,+,6,0\r\r\nC14450,60,+,6,0.333333333333333\r\r\n"
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "!wc /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": " 182475 182475 5044686 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv\r\n"
}
],
"prompt_number": 18
},
{
"cell_type": "markdown",
"metadata": {},
"source": ""
},
{
"cell_type": "code",
"collapsed": false,
"input": "#with one line of code... \n!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input_oner.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,C_count,freqC\r\r\nC10295,38,+,15,0,0\r\r\nC10295,51,+,21,0,0\r\r\nC10295,142,+,9,0,0\r\r\nC10845,110,+,6,0,0\r\r\nC10845,145,+,6,0,0\r\r\nC11248,71,+,5,0,0\r\r\nC11870,109,+,6,0,0\r\r\nC12272,65,+,5,5,1\r\r\nC14220,96,+,6,0,0\r\r\n"
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input_oner.csv > /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,38,+,15,0,0\r\r\nC10295,51,+,21,0,0\r\r\nC10295,142,+,9,0,0\r\r\nC10845,110,+,6,0,0\r\r\nC10845,145,+,6,0,0\r\r\nC11248,71,+,5,0,0\r\r\nC11870,109,+,6,0,0\r\r\nC12272,65,+,5,5,1\r\r\nC14220,96,+,6,0,0\r\r\nC14450,60,+,6,2,0.333333333333333\r\r\n"
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": "!wc /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": " 182475 182475 5410598 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv\r\n"
}
],
"prompt_number": 17
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": "T3D3"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,freqC\r\r\nC10295,38,+,14,0\r\r\nC10295,51,+,21,0\r\r\nC10295,142,+,7,0\r\r\nC10845,110,+,9,0\r\r\nC10845,145,+,6,0\r\r\nC11870,109,+,8,0\r\r\nC12052,112,+,7,0\r\r\nC12570,118,+,5,0\r\r\nC13546,84,+,5,0\r\r\n"
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,38,+,14,0\r\r\nC10295,51,+,21,0\r\r\nC10295,142,+,7,0\r\r\nC10845,110,+,9,0\r\r\nC10845,145,+,6,0\r\r\nC11870,109,+,8,0\r\r\nC12052,112,+,7,0\r\r\nC12570,118,+,5,0\r\r\nC13546,84,+,5,0\r\r\nC13878,147,+,5,0\r\r\n"
}
],
"prompt_number": 14
},
{
"cell_type": "markdown",
"metadata": {},
"source": ""
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "T3D5"
},
{
"cell_type": "raw",
"metadata": {},
"source": "python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGo_lar_T3D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit_input.csv"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,C_count,freqC\r\r\nC10295,38,+,15,0,0\r\r\nC10295,51,+,18,0,0\r\r\nC10295,142,+,9,0,0\r\r\nC14220,96,+,5,0,0\r\r\nC14450,60,+,5,0,0\r\r\nC14796,69,+,12,10,0.833333333333333\r\r\nC15066,82,+,7,0,0\r\r\nC15066,89,+,5,0,0\r\r\nC15066,193,+,6,0,0\r\r\n"
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,38,+,15,0,0\r\r\nC10295,51,+,18,0,0\r\r\nC10295,142,+,9,0,0\r\r\nC14220,96,+,5,0,0\r\r\nC14450,60,+,5,0,0\r\r\nC14796,69,+,12,10,0.833333333333333\r\r\nC15066,82,+,7,0,0\r\r\nC15066,89,+,5,0,0\r\r\nC15066,193,+,6,0,0\r\r\nC15066,197,+,6,0,0\r\r\n"
}
],
"prompt_number": 11
},
{
"cell_type": "markdown",
"metadata": {},
"source": "FILE \n/Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit.csv"
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "M1"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit_input.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "chr,start,strand,CT_count,C_count,freqC\r\r\nC10295,51,+,5,0,0\r\r\nC11141,58,+,5,0,0\r\r\nC11141,73,+,5,0,0\r\r\nC11141,78,+,5,0,0\r\r\nC11848,108,+,6,0,0\r\r\nC12768,103,+,6,1,0.166666666666667\r\r\nC13766,126,+,9,0,0\r\r\nC13766,145,+,8,0,0\r\r\nC14220,96,+,7,0,0\r\r\n"
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M1_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_M1_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,51,+,5,0,0\r\r\nC11141,58,+,5,0,0\r\r\nC11141,73,+,5,0,0\r\r\nC11141,78,+,5,0,0\r\r\nC11848,108,+,6,0,0\r\r\nC12768,103,+,6,1,0.166666666666667\r\r\nC13766,126,+,9,0,0\r\r\nC13766,145,+,8,0,0\r\r\nC14220,96,+,7,0,0\r\r\nC14220,143,+,6,0,0\r\r\n"
}
],
"prompt_number": 7
},
{
"cell_type": "markdown",
"metadata": {},
"source": "FILE \n/Volumes/web/cnidarian/BiGo_lar_M1_methylkit.csv"
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "T1D3"
},
{
"cell_type": "raw",
"metadata": {},
"source": "python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGo_lar_T1D3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit_input.csv"
},
{
"cell_type": "raw",
"metadata": {},
"source": "tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit.csv"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C10295,38,+,7,0,0\r\r\nC10295,51,+,10,0,0\r\r\nC12960,123,+,7,0,0\r\r\nC13766,126,+,6,0,0\r\r\nC13766,145,+,8,0,0\r\r\nC13874,46,+,5,0,0\r\r\nC14796,69,+,11,8,0.727272727272727\r\r\nC14944,199,+,6,0,0\r\r\nC15352,208,+,5,0,0\r\r\nC15814,143,+,6,0,0\r\r\n"
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": "FILE \n/Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit.csv"
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "T1D5"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "\n`python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGo_lar_T1D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit_input.csv`"
},
{
"cell_type": "raw",
"metadata": {},
"source": "tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit.csv"
},
{
"cell_type": "code",
"collapsed": false,
"input": "!head /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit.csv",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "C13766,145,+,5,0,0\r\r\nC16318,32,+,7,0,0\r\r\nC16318,177,+,11,0,0\r\r\nC16318,196,+,7,0,0\r\r\nC16318,203,+,7,0,0\r\r\nC16318,206,+,7,1,0.142857142857143\r\r\nC16318,302,+,16,0,0\r\r\nC17734,242,+,5,2,0.4\r\r\nC18932,429,+,6,0,0\r\r\nC19344,310,+,6,0,0\r\r\n"
}
],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": "FILE \n/Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit.csv"
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}