[BioPython] Blast Parser

Emmanuel TALLA etalla at pasteur.fr
Thu Jun 12 13:06:27 EDT 2003


Hi everyone, 

I'm trying to parse a blast file with the NCBIStandalone module and I get
the following error messages. The Blast file is done with Blast 2.2.5 or
2.2.6. What wrong with the blast parsing???

Thanks

Emmanuel

###### Error messages
File "./test_parsing_blast.py", line 56, in ?
    print BlastParser2CrossHyb()
  File "./test_parsing_blast.py", line 16, in BlastParser2CrossHyb
    b_record = b_iterator.next()
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 1353, in next
    return self._parser.parse(File.StringHandle(data))
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 515, in parse
    self._scanner.feed(handle, self._consumer)
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 84, in feed
    self._scan_rounds(uhandle, consumer)
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 140, in _scan_rounds
    self._scan_alignments(uhandle, consumer)
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 261, in _scan_alignments
    self._scan_masterslave_alignment(uhandle, consumer)
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 364, in _scan_masterslave_alignment
    consumer.multalign(line)
  File "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
line 769, in multalign
    name = string.rstrip(line[:self._name_length])
TypeError: sequence index must be integer


############# The parser script ##################
#! /local/bin/python
from string import *
from Bio.Blast import NCBIStandalone


def BlastParser2CrossHyb():
    BlastFile = "YBR301w.dna.bln" 
    blast_out = open(BlastFile,'r')
    b_parser = NCBIStandalone.BlastParser()
    b_iterator = NCBIStandalone.Iterator(blast_out, b_parser)

    ID_threshold = 40
    ML_threshold = 25
    LC_threshold = 20
    ListeCH=[]

    while 1:
        b_record = b_iterator.next()

        if b_record is None:
            break

        for hit in b_record.alignments:
            for hsp in hit.hsps:
                tupleId = hsp.identities#renvoi un tuple
                Id = float(tupleId[0])
                ML= tupleId[1]
                ID= 100*Id/ML
                matches = hsp.match
                Lmatch = matches.split()
                Lmatch.sort()
                Lmatch.reverse()
                LC = len(Lmatch[0])
                if (ML >= ML_threshold and ID >= ID_threshold) or (LC >=
LC_threshold):
                    tupleGap = hsp.gaps # renvoi  un tuple
                    gap = tupleGap[0]
                    if gap == None:
                        gap = 0

                    QueryStart = hsp.query_start
                    QueryEnd = hsp.query_start + ML - 1 - gap
			
                    tupleStrand = hsp.strand
                    if tupleStrand[1] == 'Minus':
                        SubjectStart = hsp.sbjct_start - ML + 1 #question
gap sur le subject???
                        SubjectEnd = hsp.sbjct_start
                    else:
                        SubjectStart = hsp.sbjct_start
                        SubjectEnd = hsp.sbjct_start + ML - 1 #ne tient pas
compte du gap

                    key = [QueryStart, QueryEnd, hit.title[1:],
SubjectStart, SubjectEnd, ID, tupleStrand[1]]
                    ListeCH.append(key)

    blast_out.close()
    return ListeCH


print BlastParser2CrossHyb()

############
**************************************
Emmanuel TALLA, Ph.D 
Institut Pasteur
Genetique Moleculaire des levures
25, rue du dr Roux
75724 Paris cedex 15
France
Tel: +33(0)1.40.61.30.59
Fax: +33(0)1.40.61.34.56
****************************************


More information about the BioPython mailing list