[Biopython] NCBIXML: hit start and end
Mic
mictadlo at gmail.com
Wed Apr 24 01:55:06 EDT 2013
Hi,
I have tried to rewrite the Perl code to Biopython
sub retrieve {
my $blast_report = $options->{'blast'};
my $max_hits = $options->{'maxhits'};
my $searchio = new Bio::SearchIO(
-format => 'blast',
-file => $blast_report
);
while ( my $result = $searchio->next_result ) {
my $query_name = $result->query_name();
my $count_unirefs = 0;
my %hit_names_count = ();
while ( my $hit = $result->next_hit ) {
$count_unirefs++;
my $count_hsp = 0;
my @plushsps = ();
my @minhsps = ();
while ( my $hsp = $hit->next_hsp ) {
$count_hsp++;
my $query_start = $hsp->start('query');
my $query_end = $hsp->end('query');
my $hit_start = $hsp->start('hit');
my $hit_end = $hsp->end('hit');
my $strand = $hsp->strand();
my $hit_desc = $hit->description();
my @hsp_data = ($query_start, $query_end, $hit_start,
$hit_end, $hit_desc);
}
}
}
}
Biopython code:
---------------
from Bio import SeqIO
from Bio.Blast import NCBIXML
def retrieve_hits_data():
max_hits = 5 # Change to args
with open("test/x.xml") as bf:
blast_records = NCBIXML.parse(bf)
for blast_record in blast_records:
print blast_record.query
print
for alignment in blast_record.alignments:
print 'sequence:', alignment.title
print alignment.hit_id
print alignment.hit_def
print 'length:', alignment.length
for hsp in alignment.hsps:
print "HSPs"
print "----"
print 'e value:', hsp.expect
#print hsp.query
#print hsp.match
#print hsp.sbjct
print hsp.score
print hsp.bits
print hsp.num_alignments
print hsp.identities
print hsp.positives
print hsp.gaps
print hsp.align_length
print hsp.strand
print hsp.frame
print hsp.query_start
print hsp.query_end
#print hsp.hit_start
#print hsp.hit_end
print hsp.sbjct_start
print hsp.sbjct_end
retrieve_hits_data()
Output from Biopython code:
XA10_v3.0-snap.1
XA10_v3.0-snap.2
XA10_v3.0-snap.3
XA10_v3.0-snap.4
sequence: UniRef90_Q9FX16 F12G12.10 protein n=1 Tax=Arabidopsis thaliana
RepID=Q9FX16_ARATH
UniRef90_Q9FX16
F12G12.10 protein n=1 Tax=Arabidopsis thaliana RepID=Q9FX16_ARATH
length: 308
HSPs
----
e value: 8.30308e-88
709.0
277.715
None
146
192
10
285
(None, None)
(0, 0)
10
290
8
286
How do I get hsp->start('hit') and hsp->end('hit') from the bioperl code in
Biopython?
Why does blast_record.query appears immediately in sequence and not after
the other two for loops has finished?
Thank you in advance.
Mic
More information about the Biopython
mailing list