Posted by Bloguero_Connor on Wed 2 Apr 12:32 (modification of post by Bloguero_Connor view diff)
download | new post
- # Code by Sebastian Bassi
- # License: GPL 3.0 (http://www.gnu.org/licenses/gpl-3.0-standalone.html)
- from Bio.Blast import NCBIXML
- f_in='/mnt/hda2/bio/blast-2.2.18/bin/m3x-218.xml'
- fr=NCBIXML.parse(open(f_in)).next()
- print '''<HTML>
- <TITLE>BLAST Search Results</TITLE>
- <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
- <PRE>'''
- print '<b>%s</b>' %(fr.application+' '+fr.version+' '+fr.date)
- print '\n<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9254694&dopt=Citation">Reference</a>:</b>'+\
- fr.reference.replace('~',' ')
- for rec in NCBIXML.parse(open(f_in)):
- print '<b>Query=</b> %s' %(rec.query)
- print ' (%s letters)' %(rec.query_letters)
- print '<b>Database:</b> %s' %(rec.database)
- if fr.num_letters_in_database!=[]:
- #This test is for a bug in Biopython 1.45
- print ' %s sequences; %s total letters' \
- %(fr.num_sequences_in_database,
- fr.num_letters_in_database)
- #except AttributeError:
- else:
- # For Biopython > 1.45
- print ' %s sequences; %s total letters' \
- %(fr.num_sequences_in_database,
- fr._num_letters_in_database)
- print '''Searching..................................................done
- <PRE>
- Score E
- Sequences producing significant alignments: (bits) Value
- '''
- for d in fr.descriptions:
- k=d.accession
- desc=d.title
- bs=d.score
- sc=d.e
- if 'gi|' in k:
- m=k.index('gi|')+3
- gi=k[m:k[m:].index('|')+3]
- print '<a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?\
- cmd=Retrieve&db=Nucleotide&list_uids=%s&dopt=GenBank" >\
- %s</a> %s <a href = #%s> %s</a> %s' \
- %(gi,k.replace('gi|'+gi+'|',''),desc[:36]+'...',gi,bs,sc)
- else:
- print '%s <a href = #%s> %s</a> %s' \
- %(desc[:40]+'...',k,bs,sc)
- print '</PRE>\n'
- for alig in fr.alignments:
- print '<PRE>\n'
- k=alig.hit_id
- desc=alig.hit_def
- if 'gi|' in k:
- m=k.index('gi|')+3
- gi=k[m:k[m:].index('|')+3]
- print '><a name=%s></a><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?\
- cmd=Retrieve&db=Nucleotide&list_uids=%s&dopt=GenBank" >\
- %s</a> %s ' \
- %(gi,gi,k.replace('gi|'+gi+'|',''),desc)
- else:
- print '><a name=%s></a>%s' %(k[k.index('|')+1:],desc)
- print ' Length = %s\n' %(alig.length)
- # Walk over all the hsps
- for hsp in alig.hsps:
- bs=hsp.bits
- hsc=hsp.score
- sc=hsp.expect
- h_id=hsp.identities
- h_pos=hsp.positives
- q_frame=hsp.frame[0]
- h_frame=hsp.frame[1]
- q_from=str(hsp.query_start)
- q_to=str(hsp.query_end)
- h_from=str(hsp.sbjct_start)
- h_to=str(hsp.sbjct_end)
- qseq=hsp.query
- hseq=hsp.sbjct
- medio=hsp.match
- if q_frame==1:
- qf='Plus'
- else:
- qf='Minus'
- if h_frame==1:
- hf='Plus'
- else:
- hf='Minus'
- print 'Score = %s bits (%s), Expect = %s' %(bs,hsc,sc)
- print 'Identities = %s/%s (%.0f%%)' %(h_id,
- h_pos,float(int(h_id))/int(h_pos)*100)
- print 'Strand = %s/%s\n\n' %(qf,hf)
- if h_frame=='1':
- mx=max(len(q_from),len(h_from))
- print 'Query: %s %s %s' %(q_from.ljust(mx),qseq,q_to)
- print ' '+' '*mx+' '+medio
- print 'Sbjct: %s %s %s\n' %(h_from.ljust(mx),hseq,h_to)
- else:
- mx=max(len(q_from),len(h_to))
- print 'Query: %s %s %s' %(q_from.ljust(mx),qseq,q_to)
- print ' '+' '*mx+' '+medio
- print 'Sbjct: %s %s %s\n' %(h_to.ljust(mx),hseq,h_from)
- print '</PRE>'
- if fr.num_letters_in_database!=[]:
- print '''<PRE>
- Database: %s
- Number of letters in database: %s
- Number of sequences in database: %s
- Lambda K H
- %.2f %.3f %.2f
- Matrix: %s matrix:%s %s
- Gap Penalties: Existence: %s, Extension: %s
- Number of Sequences: %s
- Length of database: %s
- </PRE>
- </BODY>
- </HTML>''' %(rec.database,fr.num_letters_in_database,
- fr.num_sequences_in_database,
- fr.ka_params[0],fr.ka_params[1],
- fr.ka_params[2],fr.application,
- fr.sc_match,fr.sc_mismatch,
- fr.gap_penalties[0],fr.gap_penalties[1],
- fr.num_sequences_in_database,
- fr._num_letters_in_database)
- else:
- # Hack for Biopython 1.45 or earlier.
- print '''<PRE>
- Database: %s
- Number of letters in database: %s
- Number of sequences in database: %s
- Lambda K H
- %.2f %.3f %.2f
- Matrix: %s matrix:%s %s
- Gap Penalties: Existence: %s, Extension: %s
- Number of Sequences: %s
- Length of database: %s
- </PRE>
- </BODY>
- </HTML>''' %(rec.database,fr._num_letters_in_database,
- fr.num_sequences_in_database,
- fr.ka_params[0],fr.ka_params[1],
- fr.ka_params[2],fr.application,
- fr.sc_match,fr.sc_mismatch,
- fr.gap_penalties[0],fr.gap_penalties[1],
- fr.num_sequences_in_database,
- fr._num_letters_in_database)
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.