Posted by Bloguero_Connor on Wed 2 Apr 12:32
download | new post
- # Code by Sebastian Bassi
- # License: GPL 3.0 (http://www.gnu.org/licenses/gpl-3.0-standalone.html)
- import xml.etree.cElementTree as cET
- f_in='/mnt/hda2/bio/blast-2.2.18/bin/m3x-218.xml'
- i_hits={}
- hits={}
- hsps={}
- for ev,x in cET.iterparse(f_in):
- if 'BlastOutput_version' in x.tag:
- b_version=x.text
- elif 'BlastOutput_program' in x.tag:
- b_prg=x.text
- elif 'BlastOutput_reference' in x.tag:
- b_reference=x.text[12:]
- elif 'BlastOutput_query-def' in x.tag:
- b_query_def=x.text
- elif 'BlastOutput_query-len' in x.tag:
- b_query_len=x.text
- elif 'BlastOutput_db' in x.tag:
- b_db=x.text
- elif 'Parameters_expect' in x.tag:
- p_expect=x.text
- elif 'Parameters_sc-match' in x.tag:
- p_sc_match=x.text
- elif 'Parameters_sc-mismatch' in x.tag:
- p_sc_mismatch=x.text
- elif 'Parameters_gap-open' in x.tag:
- p_gap_open=x.text
- elif 'Parameters_gap-extend' in x.tag:
- p_gap_extend=x.text
- elif 'Parameters_filter' in x.tag:
- p_filter=x.text
- elif 'Iteration_query-def' in x.tag:
- i_query_def=x.text
- elif 'Iteration_iter-num' in x.tag:
- i_iter_num=x.text
- elif 'Iteration_query-ID' in x.tag:
- i_query_id=x.text
- elif 'Iteration_query-len' in x.tag:
- i_query_len=x.text
- elif 'Iteration'==x.tag:
- i_hits[int(i_iter_num)]=(i_query_id, i_query_def,
- i_query_len, hits, stats)
- hits={}
- elif 'Hit_num' in x.tag:
- h_num=x.text
- elif 'Hit_id' in x.tag:
- h_id=x.text
- elif 'Hit_def' in x.tag:
- h_def=x.text
- elif 'Hit_accession' in x.tag:
- h_accession=x.text
- elif 'Hit_len' in x.tag:
- h_len=x.text
- elif 'Hit'==x.tag:
- hits[int(h_num)]=(h_id,h_def,h_accession,h_len,hsps)
- hsps={}
- elif 'Hsp_num' in x.tag:
- hsp_num=x.text
- elif 'Hsp_bit-score' in x.tag:
- hsp_bit_score=x.text
- elif 'Hsp_score' in x.tag:
- hsp_score=x.text
- elif 'Hsp_evalue' in x.tag:
- hsp_evalue=x.text
- elif 'Hsp_query-from' in x.tag:
- hsp_query_from=x.text
- elif 'Hsp_query-to' in x.tag:
- hsp_query_to=x.text
- elif 'Hsp_hit-from' in x.tag:
- hsp_hit_from=x.text
- elif 'Hsp_hit-to' in x.tag:
- hsp_hit_to=x.text
- elif 'Hsp_query-frame' in x.tag:
- hsp_query_frame=x.text
- elif 'Hsp_hit-frame' in x.tag:
- hsp_hit_frame=x.text
- elif 'Hsp_identity' in x.tag:
- hsp_identity=x.text
- elif 'Hsp_positive' in x.tag:
- hsp_positive=x.text
- elif 'Hsp_align-len' in x.tag:
- hsp_align_len=x.text
- elif 'Hsp_qseq' in x.tag:
- hsp_qseq=x.text
- elif 'Hsp_hseq' in x.tag:
- hsp_hseq=x.text
- elif 'Hsp_midline' in x.tag:
- hsp_midline=x.text
- elif 'Hsp'==x.tag:
- hsps[int(hsp_num)]=(hsp_bit_score,hsp_score,hsp_evalue,
- hsp_query_from,hsp_query_to,hsp_hit_from,hsp_hit_to,
- hsp_query_frame,hsp_hit_frame,hsp_identity,
- hsp_positive,hsp_align_len,hsp_qseq,
- hsp_hseq,hsp_midline)
- elif 'Statistics_db-num' in x.tag:
- s_db_num=x.text
- elif 'Statistics_db-len' in x.tag:
- s_db_len=x.text
- elif 'Statistics_hsp-len' in x.tag:
- s_hsp_len=x.text
- elif 'Statistics_eff-space' in x.tag:
- s_eff_space=x.text
- elif 'Statistics_kappa' in x.tag:
- s_kappa=x.text
- elif 'Statistics_lambda' in x.tag:
- s_lambda=x.text
- elif 'Statistics_entropy' in x.tag:
- s_entropy=x.text
- elif 'Statistics'==x.tag:
- stats=(s_db_num,s_db_len,s_hsp_len,s_eff_space,
- s_kappa,s_lambda,s_entropy)
- print '''<HTML>
- <TITLE>BLAST Search Results</TITLE>
- <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
- <PRE>'''
- print '<b>'+b_version+'</b>\n'
- print '<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9254694&dopt=Citation">Reference</a>:</b>'+\
- b_reference.replace('~',' ')
- for x in xrange(1,len(i_hits)+1):
- print '<b>Query=</b> %s' %(i_hits[x][1])
- print ' (%s letters)' %(i_hits[x][2])
- print '<b>Database:</b> %s' %(b_db)
- print ' %s sequences; %s total letters' %(i_hits[x][4][0],i_hits[x][4][1])
- print '''Searching..................................................done
- <PRE>
- Score E
- Sequences producing significant alignments: (bits) Value
- '''
- for y in xrange(1,len(i_hits[x][3])+1):
- k=i_hits[x][3][y][0]
- m=k.index('gi|')+3
- gi=k[m:k[m:].index('|')+3]
- desc=i_hits[x][3][y][1]
- bs=i_hits[x][3][y][4][1][0]
- sc=i_hits[x][3][y][4][1][2]
- print '<a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?\
- cmd=Retrieve&db=Nucleotide&list_uids=%s&dopt=GenBank" >\
- %s</a> %s <a href = #%s> %s</a> %s' \
- %(gi,k.replace('gi|'+gi+'|',''),desc[:36]+'...',gi,bs,sc)
- print '</PRE>\n'
- for y in xrange(1,len(i_hits[x][3])+1):
- print '<PRE>\n'
- k=i_hits[x][3][y][0]
- m=k.index('gi|')+3
- gi=k[m:k[m:].index('|')+3]
- desc=i_hits[x][3][y][1]
- print '><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?\
- cmd=Retrieve&db=Nucleotide&list_uids=%s&dopt=GenBank" >\
- %s</a> %s ' \
- %(gi,k.replace('gi|'+gi+'|',''),desc)
- print ' Length = '+i_hits[x][3][y][3]+'\n'
- # Walk over all the hsps
- for z in xrange(1,len(i_hits[x][3][y][4])+1):
- bs=i_hits[x][3][y][4][z][0]
- hsc=i_hits[x][3][y][4][z][1]
- sc=i_hits[x][3][y][4][z][2]
- h_id=i_hits[x][3][y][4][z][10]
- h_pos=i_hits[x][3][y][4][z][11]
- q_frame=i_hits[x][3][y][4][z][7]
- h_frame=i_hits[x][3][y][4][z][8]
- q_from=i_hits[x][3][y][4][z][3]
- q_to=i_hits[x][3][y][4][z][4]
- h_from=i_hits[x][3][y][4][z][5]
- h_to=i_hits[x][3][y][4][z][6]
- qseq=i_hits[x][3][y][4][z][12]
- hseq=i_hits[x][3][y][4][z][13]
- medio=i_hits[x][3][y][4][z][14]
- if q_frame=='1':
- qf='Plus'
- else:
- qf='Minus'
- if h_frame=='1':
- hf='Plus'
- else:
- hf='Minus'
- print 'Score = %s bits (%s), Expect = %s' %(bs,hsc,sc)
- print 'Identities = %s/%s (%.0f%%)' %(h_id,
- h_pos,float(int(h_id))/int(h_pos)*100)
- print 'Strand = %s/%s\n\n' %(qf,hf)
- if h_frame=='1':
- mx=max(len(q_from),len(h_from))
- print 'Query: %s %s %s' %(q_from.ljust(mx),qseq,q_to)
- print ' '+' '*mx+' '+medio
- print 'Sbjct: %s %s %s\n' %(h_from.ljust(mx),hseq,h_to)
- else:
- mx=max(len(q_from),len(h_to))
- print 'Query: %s %s %s' %(q_from.ljust(mx),qseq,q_to)
- print ' '+' '*mx+' '+medio
- print 'Sbjct: %s %s %s\n' %(h_to.ljust(mx),hseq,h_from)
- print '</PRE>'
- print '''<PRE>
- Database: %s
- Number of letters in database: %s
- Number of sequences in database: %s
- Lambda K H
- %.2f %.3f %.2f
- Matrix: %s matrix:%s %s
- Gap Penalties: Existence: %s, Extension: %s
- Number of Sequences: %s
- Length of database: %s
- </PRE>
- </BODY>
- </HTML>''' %(b_version,i_hits[1][4][1],i_hits[1][4][0],
- float(i_hits[1][4][4]),
- float(i_hits[1][4][5]),
- float(i_hits[1][4][6]),b_prg,p_sc_match,
- p_sc_mismatch,p_gap_open,p_gap_extend,
- i_hits[1][4][0],i_hits[1][4][1])
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.