[BioPython] Blast Parser
Jeffrey Chang
jchang at jeffchang.com
Thu Jun 12 09:27:16 EDT 2003
Try using the latest version of NCBIStandalone from cvs.biopython.org.
If it still doesn't work, please email me the blast run that is causing
the problem.
Jeff
On Thursday, June 12, 2003, at 04:06 AM, Emmanuel TALLA wrote:
> Hi everyone,
>
> I'm trying to parse a blast file with the NCBIStandalone module and I
> get
> the following error messages. The Blast file is done with Blast 2.2.5
> or
> 2.2.6. What wrong with the blast parsing???
>
> Thanks
>
> Emmanuel
>
> ###### Error messages
> File "./test_parsing_blast.py", line 56, in ?
> print BlastParser2CrossHyb()
> File "./test_parsing_blast.py", line 16, in BlastParser2CrossHyb
> b_record = b_iterator.next()
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 1353, in next
> return self._parser.parse(File.StringHandle(data))
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 515, in parse
> self._scanner.feed(handle, self._consumer)
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 84, in feed
> self._scan_rounds(uhandle, consumer)
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 140, in _scan_rounds
> self._scan_alignments(uhandle, consumer)
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 261, in _scan_alignments
> self._scan_masterslave_alignment(uhandle, consumer)
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 364, in _scan_masterslave_alignment
> consumer.multalign(line)
> File
> "/local/lib/python2.2/site-packages/Bio/Blast/NCBIStandalone.py",
> line 769, in multalign
> name = string.rstrip(line[:self._name_length])
> TypeError: sequence index must be integer
>
>
> ############# The parser script ##################
> #! /local/bin/python
> from string import *
> from Bio.Blast import NCBIStandalone
>
>
> def BlastParser2CrossHyb():
> BlastFile = "YBR301w.dna.bln"
> blast_out = open(BlastFile,'r')
> b_parser = NCBIStandalone.BlastParser()
> b_iterator = NCBIStandalone.Iterator(blast_out, b_parser)
>
> ID_threshold = 40
> ML_threshold = 25
> LC_threshold = 20
> ListeCH=[]
>
> while 1:
> b_record = b_iterator.next()
>
> if b_record is None:
> break
>
> for hit in b_record.alignments:
> for hsp in hit.hsps:
> tupleId = hsp.identities#renvoi un tuple
> Id = float(tupleId[0])
> ML= tupleId[1]
> ID= 100*Id/ML
> matches = hsp.match
> Lmatch = matches.split()
> Lmatch.sort()
> Lmatch.reverse()
> LC = len(Lmatch[0])
> if (ML >= ML_threshold and ID >= ID_threshold) or (LC
> >=
> LC_threshold):
> tupleGap = hsp.gaps # renvoi un tuple
> gap = tupleGap[0]
> if gap == None:
> gap = 0
>
> QueryStart = hsp.query_start
> QueryEnd = hsp.query_start + ML - 1 - gap
>
> tupleStrand = hsp.strand
> if tupleStrand[1] == 'Minus':
> SubjectStart = hsp.sbjct_start - ML + 1
> #question
> gap sur le subject???
> SubjectEnd = hsp.sbjct_start
> else:
> SubjectStart = hsp.sbjct_start
> SubjectEnd = hsp.sbjct_start + ML - 1 #ne
> tient pas
> compte du gap
>
> key = [QueryStart, QueryEnd, hit.title[1:],
> SubjectStart, SubjectEnd, ID, tupleStrand[1]]
> ListeCH.append(key)
>
> blast_out.close()
> return ListeCH
>
>
> print BlastParser2CrossHyb()
>
> ############
> **************************************
> Emmanuel TALLA, Ph.D
> Institut Pasteur
> Genetique Moleculaire des levures
> 25, rue du dr Roux
> 75724 Paris cedex 15
> France
> Tel: +33(0)1.40.61.30.59
> Fax: +33(0)1.40.61.34.56
> ****************************************
> _______________________________________________
> BioPython mailing list - BioPython at biopython.org
> http://biopython.org/mailman/listinfo/biopython
More information about the BioPython
mailing list