[BioPython] Blast Parser
Emmanuel TALLA
etalla at pasteur.fr
Thu Jun 12 13:06:27 EDT 2003
Hi everyone,
I'm trying to parse a blast file with the NCBIStandalone module and I get
the following error messages. The Blast file is done with Blast 2.2.5 or
2.2.6. What wrong with the blast parsing???
Thanks
Emmanuel
###### Error messages
File "./test_parsing_blast.py", line 56, in ?
print BlastParser2CrossHyb()
File "./test_parsing_blast.py", line 16, in BlastParser2CrossHyb
b_record = b_iterator.next()
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 1353, in next
return self._parser.parse(File.StringHandle(data))
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 515, in parse
self._scanner.feed(handle, self._consumer)
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 84, in feed
self._scan_rounds(uhandle, consumer)
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 140, in _scan_rounds
self._scan_alignments(uhandle, consumer)
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 261, in _scan_alignments
self._scan_masterslave_alignment(uhandle, consumer)
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 364, in _scan_masterslave_alignment
consumer.multalign(line)
File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 769, in multalign
name = string.rstrip(line[:self._name_length])
TypeError: sequence index must be integer
############# The parser script ##################
#! /local/bin/python
from string import *
from Bio.Blast import NCBIStandalone
def BlastParser2CrossHyb():
BlastFile = "YBR301w.dna.bln"
blast_out = open(BlastFile,'r')
b_parser = NCBIStandalone.BlastParser()
b_iterator = NCBIStandalone.Iterator(blast_out, b_parser)
ID_threshold = 40
ML_threshold = 25
LC_threshold = 20
ListeCH=[]
while 1:
b_record = b_iterator.next()
if b_record is None:
break
for hit in b_record.alignments:
for hsp in hit.hsps:
tupleId = hsp.identities#renvoi un tuple
Id = float(tupleId[0])
ML= tupleId[1]
ID= 100*Id/ML
matches = hsp.match
Lmatch = matches.split()
Lmatch.sort()
Lmatch.reverse()
LC = len(Lmatch[0])
if (ML >= ML_threshold and ID >= ID_threshold) or (LC >=
LC_threshold):
tupleGap = hsp.gaps # renvoi un tuple
gap = tupleGap[0]
if gap == None:
gap = 0
QueryStart = hsp.query_start
QueryEnd = hsp.query_start + ML - 1 - gap
tupleStrand = hsp.strand
if tupleStrand[1] == 'Minus':
SubjectStart = hsp.sbjct_start - ML + 1 #question
gap sur le subject???
SubjectEnd = hsp.sbjct_start
else:
SubjectStart = hsp.sbjct_start
SubjectEnd = hsp.sbjct_start + ML - 1 #ne tient pas
compte du gap
key = [QueryStart, QueryEnd, hit.title[1:],
SubjectStart, SubjectEnd, ID, tupleStrand[1]]
ListeCH.append(key)
blast_out.close()
return ListeCH
print BlastParser2CrossHyb()
############
**************************************
Emmanuel TALLA, Ph.D
Institut Pasteur
Genetique Moleculaire des levures
25, rue du dr Roux
75724 Paris cedex 15
France
Tel: +33(0)1.40.61.30.59
Fax: +33(0)1.40.61.34.56
****************************************
More information about the BioPython
mailing list