[Bioperl-l] New to Bio::Tools::Run::RemoteBlast
Smithies, Russell
Russell.Smithies at agresearch.co.nz
Mon Sep 21 19:04:54 EDT 2009
If you want to "manually" use Perl and QBlast, here's some example code.
I don't remember where it came from but it works well :-)
**Ignore the UserAgent stuff, our firewall is fairly well tied down.
--Russell
============================
#!perl -w
$| = 1;
use LWP::UserAgent;
use HTTP::Request::Common 'POST';
$ua = LWP::UserAgent->new;
push @{ $ua->requests_redirectable }, 'POST'; #LWP doesn't redirect by default
$ua->agent('Mozilla/5.0');
#$ua->proxy( [ 'http', 'ftp' ] => 'http://username:password@your.proxy.if.required:8080' );
my $verbose = 1;
my $seq = getSequence();
my ( $blast, $taxonomy ) = queryQBlast($seq);
$verbose && print "saving result\n";
saveToFile( $blast, "blast.txt" );
saveToFile( $taxonomy, "taxonomy.html" );
$verbose && print "Done.\n";
sub queryQBlast {
my ($seq) = @_;
$seq =~ s/[\d\n\W]//g;
my $sleepTime = 0;
my $sleepTimeIncrement = 5;
my $totalSleepTime = 0;
my $maxSleepTime = 600; # 10 min
my ( $rid, $rtoe ) = startQBlast($seq);
my ( $blast, $taxonomy );
while ( !$blast ) {
$verbose && printf "wait %3d seconds\n", $sleepTime;
sleep $sleepTime;
( $blast, $taxonomy ) = retrieveQBlastResult($rid);
$sleepTime += $sleepTimeIncrement unless ( $sleepTime > 100 );
$totalSleepTime += $sleepTimeIncrement;
last if ( $totalSleepTime > $maxSleepTime );
}
return ( $blast, $taxonomy );
}
sub startQBlast {
my ($sequence) = @_;
my ( $expect, $wsize, $filter, $mega );
my $hitList = 100;
if ( length($sequence) <= 20 ) {
$expect = 1000;
$wsize = 7;
$mega = "on";
$filter = "";
}
else {
$expect = 10;
$wsize = 28;
$mega = "on";
$filter = "L"; # Low complexity
}
my $qblastURL = "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?";
my $url = $qblastURL . "QUERY=$sequence";
$url .=
"&DATABASE=nr&HITLIST_SIZE=${hitList}&FILTER=${filter}&EXPECT=${expect}&FORMAT_TYPE=Text";
$url .=
"&PROGRAM=blastn&CLIENT=web&SERVICE=plain&NCBI_GI=on&PAGE=Nucleotides";
$url .= "&SHOW_OVERVIEW=&WORD_SIZE=${wsize}&MEGABLAST=${mega}&CMD=Put";
my $req = HTTP::Request->new( GET => $url );
my $content = $ua->request($req)->content;
$content =~ s/\s+/ /g;
my ( $rid, $rtoe ) = $content =~
/QBlastInfoBegin RID = ([\d\-\.\w]+) RTOE = (\d+) QBlastInfoEnd/;
if ( !$rid ) { print qq{\nERROR missing RID:\n}; exit; }
$verbose && print "RID $rid\n";
return ( $rid, $rtoe );
}
sub retrieveQBlastResult {
my ($rid) = @_;
my $qblastURL = "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?";
my $url = $qblastURL
. "RID=$rid&CMD=Get&SHOW_OVERVIEW=&SHOW_LINKOUT=&FORMAT_TYPE=Text";
my ( $blast, $taxonomy, $req );
$req = HTTP::Request->new( GET => $url );
$blast = $ua->request($req)->content;
if ( $blast =~ /\s+Status=WAITING/ ) {
$blast = "";
}
elsif ( $blast =~ /\s+Status=UNKNOWN/ ) {
print "Error in processing\nRID $rid\n";
exit;
}
else {
$verbose && print "got blast result\n";
$verbose && print "retrieving taxonomy data\n";
$url = $qblastURL . "CMD=Get&RID=$rid&FORMAT_OBJECT=TaxBlast&NCBI_GI=on";
$req = HTTP::Request->new( GET => $url );
$taxonomy = $ua->request($req)->content;
$taxonomy = "" if ( $taxonomy =~ /No valid taxids found in the alignment/ );
}
return ( $blast, $taxonomy );
}
sub saveToFile {
my ( $data, $file ) = @_;
local (*OUT);
open( OUT, ">$file" );
print OUT $data;
close OUT;
}
sub getSequence {
return qq{
AAAGGATTTATTGACGATGCGAACTACTCCGTTGGCCTGTTGGATGAAGGAACAAA
CCTTGGAAATGTTATTGATAACTATGTTTATGAACATACCCTGACAGGAAAAAATGCAT
TTTTTGTGGGGGATCTTGGGAAGATCGTGAAGAAGCACAGTCAGTGGCAGACCGTGGTG
GCTCAGATAAAGCCGTTTTACACGGTGAAGTGCAACTCCACTCCAGCCGTGCTTGAGAT
CTTGGCAGCTCTTGGAACTGGGTTTGCTTGTTCCAGCAAAAATGAAATGGCTTTAGTGC
AAGAATTGGGTGTATCTCCAGAAAACATCATTTTCACAAGTCCTTGTAAGCAAGTGTCT
CAGATAAAGTATGCAGCAAAAGTTGGAGTAAATATTATGACATGTGACAATGAGATTGA
ATTAAAGAAAATTGCAAGGAATCACCCAAATGCCAAGGTCTTACTACATATTGCAACAG
AAGATAATATTGGAGGTGAAGATGGTAACATGAAGTTTGGCACTACACTGAAGAATTGT
AGGCATCTTTTGGAATGTGCCAAGGAACTTGATGTCCAAATAATTGGGGTTAAATTTCA
TGTTTCAAGTGCTTGCAAAGAATATCAAGTATATGTACATGCCCTGTCTGATGCTCGAT
GTGTGTTTGACATGGCTGGAGAGTTTGGCTTTACAATGAACATGTTAGACATCGGTGGA
GGCTTCACAGGAACTGAAATTCAGTTGGAAGAGGTTAATCATGTTATCAGTCCTCTGTT
GGATATTTACTTCCCTGAAGGATCTGGCATTCAGATAATTTCAGAACCTGGAAGCTACT
ATGTATCTTCTGCGTTTACACTTGCAGTCAATATTATTGCTAAGAAAGTTGTTGAAAAT
GATAAATTTTCCTCTGGAGTAGAAAAAAATGGGAGTGATGAGCCAGCCTTCGTGTATTA
CATGAATGATGGTGTTTATGGTTCTTTTGCGAGTAAGCTTTCTGAGGACTTAAATACCA
TTCCAGAGGTTCACAAGAAATACAAGGAAGATGAGCCTCTGTTTACAAGCAGCCTTTGG
GGTCCATCCTGTGATGAGCTTGATCAAATTGTGGAAAGCTGTCTTCTTCCTGAGCTGAA
TGTGGGAGATTGGCTTATCTTTGATAACATGGGAGCAGATTCTTTCCACGAACCATCTG
CTTTTAATGATTTTCAGAGGCCAGCTATTTATTTCATGATGTCATTCAGTGATTGGTAT
GAGATGCAAGATGCTGGAATTACTTCAGATGCAATGATGAAAAACTTCTTCTTTGCACC
CTCTTGTATTCAGCTGAGCCAAGAAGACAGCTTTTCCACTGAAGCT};
}
================================
> -----Original Message-----
> From: bioperl-l-bounces at lists.open-bio.org [mailto:bioperl-l-
> bounces at lists.open-bio.org] On Behalf Of Smithies, Russell
> Sent: Tuesday, 22 September 2009 10:48 a.m.
> To: 'bill at genenformics.com'
> Cc: 'bioperl-l at lists.open-bio.org'; 'armendarez77 at hotmail.com'
> Subject: Re: [Bioperl-l] New to Bio::Tools::Run::RemoteBlast
>
> That doesn't work with remote databases though.
> B:T:R:RemoteBlast uses the QBlast API (I think) so you're limited to the
> prebuilt databases NCBI offers.
> http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html
>
> Another thing to try is space-seperating your db list - I know it works with
> local blasts.
> You could also bypass RemoteBlast and do it yourself by POSTing via URL.
>
> This seems to work with multiple databases but you'd need to experiment:
>
> http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=257700677&DATABASE=%22Microb
> ial/100226%20Microbial/101510%20Microbial/103690%22&HITLIST_SIZE=10&FILTER=L&E
> XPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=plain&NCBI_GI=on&P
> AGE=Nucleotides&CMD=Put
>
>
> --Russell
>
>
> > -----Original Message-----
> > From: bill at genenformics.com [mailto:bill at genenformics.com]
> > Sent: Tuesday, 22 September 2009 10:21 a.m.
> > To: Smithies, Russell
> > Cc: 'armendarez77 at hotmail.com'; 'bioperl-l at lists.open-bio.org'
> > Subject: Re: [Bioperl-l] New to Bio::Tools::Run::RemoteBlast
> >
> > BLAST DBs can be concatenated into a single target (.nal or .pal) file.
> >
> > Check this out:
> >
> > http://www.ncbi.nlm.nih.gov/Web/Newsltr/Winter00/blastlab.html
> >
> > Bill
> >
> > > You may need to setup blast locally (not a big job) as I don't think you
> > > can blast against multiple databases with B:T:R:RemoteBlast.
> > > Or you could do it manually on NCBI's site where you can filter results by
> > > entrez query (eg. 1239[taxid] for fermicutes)
> > > http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml#entrez_query
> > >
> > > --Russell
> > >
> > >
> > >> -----Original Message-----
> > >> From: bioperl-l-bounces at lists.open-bio.org [mailto:bioperl-l-
> > >> bounces at lists.open-bio.org] On Behalf Of armendarez77 at hotmail.com
> > >> Sent: Tuesday, 22 September 2009 9:01 a.m.
> > >> To: bioperl-l at lists.open-bio.org
> > >> Subject: [Bioperl-l] New to Bio::Tools::Run::RemoteBlast
> > >>
> > >>
> > >>
> > >>
> > >>
> > >>
> > >>
> > >> Hello,
> > >>
> > >> Is there a function to blast one query sequence against multiple blast
> > >> databases? For example, I want to blast a sequence against all
> > >> Microbial
> > >> Genomes. Currently, I can do it by placing multiple Microbial databases
> > >> (eg.
> > >> Microbial/100226, Microbial/101510, etc) into an array and iterate
> > >> through
> > >> them using a foreach loop. Each individual database is placed in the
> > >> '-data'
> > >> parameter and the blast is performed.
> > >>
> > >> Example Code:
> > >>
> > >> use strict;
> > >> use Bio::Tools::Run::RemoteBlast;
> > >>
> > >> my @microbDbs = qw(Microbial/100226 Microbial/101510 Microbial/103690
> > >> Microbial/1063);
> > >> my $e_val= '1e-3';
> > >>
> > >> foreach my $db(@microbDbs){
> > >> my @params = ( '-prog' => $prog,
> > >> '-data' => $db,
> > >> '-expect' => $e_val,
> > >> '-readmethod' => 'xml' );
> > >>
> > >> my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
> > >> my $v = 1;
> > >> my $str = Bio::SeqIO->new(-file=>'test.fa' , '-format' => 'fasta' );
> > >> while (my $input = $str->next_seq()){
> > >> my $r = $factory->submit_blast($input);
> > >>
> > >> #Code continues...
> > >>
> > >> }
> > >>
> > >> Is there a more efficient way to accomplish this?
> > >>
> > >> If this topic has been discussed please point the way.
> > >>
> > >> Thank you,
> > >>
> > >> Veronica
> > >>
> > >>
> > >> _________________________________________________________________
> > >> Microsoft brings you a new way to search the web. Try Bing(tm) now
> > >>
> >
> http://www.bing.com?form=MFEHPG&publ=WLHMTAG&crea=TEXT_MFEHPG_Core_tagline_try
> > >> bing_1x1
> > >> _______________________________________________
> > >> Bioperl-l mailing list
> > >> Bioperl-l at lists.open-bio.org
> > >> http://lists.open-bio.org/mailman/listinfo/bioperl-l
> > > =======================================================================
> > > Attention: The information contained in this message and/or attachments
> > > from AgResearch Limited is intended only for the persons or entities
> > > to which it is addressed and may contain confidential and/or privileged
> > > material. Any review, retransmission, dissemination or other use of, or
> > > taking of any action in reliance upon, this information by persons or
> > > entities other than the intended recipients is prohibited by AgResearch
> > > Limited. If you have received this message in error, please notify the
> > > sender immediately.
> > > =======================================================================
> > >
> > > _______________________________________________
> > > Bioperl-l mailing list
> > > Bioperl-l at lists.open-bio.org
> > > http://lists.open-bio.org/mailman/listinfo/bioperl-l
> > >
> >
>
>
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/bioperl-l
More information about the Bioperl-l
mailing list