[Bioperl-l] New to Bio::Tools::Run::RemoteBlast

Smithies, Russell Russell.Smithies at agresearch.co.nz
Mon Sep 21 23:04:54 UTC 2009


If you want to "manually" use Perl and QBlast, here's some example code.
I don't remember where it came from but it works well  :-)

**Ignore the UserAgent stuff, our firewall is fairly well tied down.

--Russell

============================

#!perl -w
$| = 1;

use LWP::UserAgent;
use HTTP::Request::Common 'POST';

$ua = LWP::UserAgent->new;
push @{ $ua->requests_redirectable }, 'POST';   #LWP doesn't redirect by default
$ua->agent('Mozilla/5.0');

#$ua->proxy( [ 'http', 'ftp' ] => 'http://username:password@your.proxy.if.required:8080' );

my $verbose = 1;
my $seq     = getSequence();
my ( $blast, $taxonomy ) = queryQBlast($seq);
$verbose && print "saving result\n";
saveToFile( $blast,    "blast.txt" );
saveToFile( $taxonomy, "taxonomy.html" );
$verbose && print "Done.\n";

sub queryQBlast {
  my ($seq) = @_;
  $seq =~ s/[\d\n\W]//g;
  my $sleepTime          = 0;
  my $sleepTimeIncrement = 5;
  my $totalSleepTime     = 0;
  my $maxSleepTime       = 600;    # 10 min
  my ( $rid, $rtoe ) = startQBlast($seq);
  my ( $blast, $taxonomy );

  while ( !$blast ) {
    $verbose && printf "wait %3d seconds\n", $sleepTime;
    sleep $sleepTime;
    ( $blast, $taxonomy ) = retrieveQBlastResult($rid);
    $sleepTime += $sleepTimeIncrement unless ( $sleepTime > 100 );
    $totalSleepTime += $sleepTimeIncrement;
    last if ( $totalSleepTime > $maxSleepTime );
  }
  return ( $blast, $taxonomy );
}

sub startQBlast {
  my ($sequence) = @_;
  my ( $expect, $wsize, $filter, $mega );
  my $hitList = 100;
  if ( length($sequence) <= 20 ) {
    $expect = 1000;
    $wsize  = 7;
    $mega   = "on";
    $filter = "";
  }
  else {
    $expect = 10;
    $wsize  = 28;
    $mega   = "on";
    $filter = "L";    # Low complexity
  }
  my $qblastURL = "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?";
  my $url       = $qblastURL . "QUERY=$sequence";
  $url .=
"&DATABASE=nr&HITLIST_SIZE=${hitList}&FILTER=${filter}&EXPECT=${expect}&FORMAT_TYPE=Text";
  $url .=
    "&PROGRAM=blastn&CLIENT=web&SERVICE=plain&NCBI_GI=on&PAGE=Nucleotides";
  $url .= "&SHOW_OVERVIEW=&WORD_SIZE=${wsize}&MEGABLAST=${mega}&CMD=Put";
  my $req = HTTP::Request->new( GET => $url );
  my $content = $ua->request($req)->content;
  $content =~ s/\s+/ /g;
  my ( $rid, $rtoe ) = $content =~
    /QBlastInfoBegin RID = ([\d\-\.\w]+) RTOE = (\d+) QBlastInfoEnd/;
  if ( !$rid ) { print qq{\nERROR missing RID:\n}; exit; }
  $verbose && print "RID $rid\n";
  return ( $rid, $rtoe );
}

sub retrieveQBlastResult {
  my ($rid)     = @_;
  my $qblastURL = "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?";
  my $url       = $qblastURL
    . "RID=$rid&CMD=Get&SHOW_OVERVIEW=&SHOW_LINKOUT=&FORMAT_TYPE=Text";
  my ( $blast, $taxonomy, $req );
  $req = HTTP::Request->new( GET => $url );
  $blast = $ua->request($req)->content;
  if ( $blast =~ /\s+Status=WAITING/ ) {
    $blast = "";
  }
  elsif ( $blast =~ /\s+Status=UNKNOWN/ ) {
    print "Error in processing\nRID $rid\n";
    exit;
  }
  else {
    $verbose && print "got blast result\n";
    $verbose && print "retrieving taxonomy data\n";
    $url = $qblastURL . "CMD=Get&RID=$rid&FORMAT_OBJECT=TaxBlast&NCBI_GI=on";
    $req = HTTP::Request->new( GET => $url );
    $taxonomy = $ua->request($req)->content;
    $taxonomy = "" if ( $taxonomy =~ /No valid taxids found in the alignment/ );
  }
  return ( $blast, $taxonomy );
}

sub saveToFile {
  my ( $data, $file ) = @_;
  local (*OUT);
  open( OUT, ">$file" );
  print OUT $data;
  close OUT;
}

sub getSequence {
  return qq{
AAAGGATTTATTGACGATGCGAACTACTCCGTTGGCCTGTTGGATGAAGGAACAAA
CCTTGGAAATGTTATTGATAACTATGTTTATGAACATACCCTGACAGGAAAAAATGCAT
TTTTTGTGGGGGATCTTGGGAAGATCGTGAAGAAGCACAGTCAGTGGCAGACCGTGGTG
GCTCAGATAAAGCCGTTTTACACGGTGAAGTGCAACTCCACTCCAGCCGTGCTTGAGAT
CTTGGCAGCTCTTGGAACTGGGTTTGCTTGTTCCAGCAAAAATGAAATGGCTTTAGTGC
AAGAATTGGGTGTATCTCCAGAAAACATCATTTTCACAAGTCCTTGTAAGCAAGTGTCT
CAGATAAAGTATGCAGCAAAAGTTGGAGTAAATATTATGACATGTGACAATGAGATTGA
ATTAAAGAAAATTGCAAGGAATCACCCAAATGCCAAGGTCTTACTACATATTGCAACAG
AAGATAATATTGGAGGTGAAGATGGTAACATGAAGTTTGGCACTACACTGAAGAATTGT
AGGCATCTTTTGGAATGTGCCAAGGAACTTGATGTCCAAATAATTGGGGTTAAATTTCA
TGTTTCAAGTGCTTGCAAAGAATATCAAGTATATGTACATGCCCTGTCTGATGCTCGAT
GTGTGTTTGACATGGCTGGAGAGTTTGGCTTTACAATGAACATGTTAGACATCGGTGGA
GGCTTCACAGGAACTGAAATTCAGTTGGAAGAGGTTAATCATGTTATCAGTCCTCTGTT
GGATATTTACTTCCCTGAAGGATCTGGCATTCAGATAATTTCAGAACCTGGAAGCTACT
ATGTATCTTCTGCGTTTACACTTGCAGTCAATATTATTGCTAAGAAAGTTGTTGAAAAT
GATAAATTTTCCTCTGGAGTAGAAAAAAATGGGAGTGATGAGCCAGCCTTCGTGTATTA
CATGAATGATGGTGTTTATGGTTCTTTTGCGAGTAAGCTTTCTGAGGACTTAAATACCA
TTCCAGAGGTTCACAAGAAATACAAGGAAGATGAGCCTCTGTTTACAAGCAGCCTTTGG
GGTCCATCCTGTGATGAGCTTGATCAAATTGTGGAAAGCTGTCTTCTTCCTGAGCTGAA
TGTGGGAGATTGGCTTATCTTTGATAACATGGGAGCAGATTCTTTCCACGAACCATCTG
CTTTTAATGATTTTCAGAGGCCAGCTATTTATTTCATGATGTCATTCAGTGATTGGTAT
GAGATGCAAGATGCTGGAATTACTTCAGATGCAATGATGAAAAACTTCTTCTTTGCACC
CTCTTGTATTCAGCTGAGCCAAGAAGACAGCTTTTCCACTGAAGCT};
}

================================

> -----Original Message-----
> From: bioperl-l-bounces at lists.open-bio.org [mailto:bioperl-l-
> bounces at lists.open-bio.org] On Behalf Of Smithies, Russell
> Sent: Tuesday, 22 September 2009 10:48 a.m.
> To: 'bill at genenformics.com'
> Cc: 'bioperl-l at lists.open-bio.org'; 'armendarez77 at hotmail.com'
> Subject: Re: [Bioperl-l] New to Bio::Tools::Run::RemoteBlast
> 
> That doesn't work with remote databases though.
> B:T:R:RemoteBlast uses the QBlast API (I think) so you're limited to the
> prebuilt databases NCBI offers.
> http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html
> 
> Another thing to try is space-seperating your db list - I know it works with
> local blasts.
> You could also bypass RemoteBlast and do it yourself by POSTing via URL.
> 
> This seems to work with multiple databases but you'd need to experiment:
> 
> http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=257700677&DATABASE=%22Microb
> ial/100226%20Microbial/101510%20Microbial/103690%22&HITLIST_SIZE=10&FILTER=L&E
> XPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=plain&NCBI_GI=on&P
> AGE=Nucleotides&CMD=Put
> 
> 
> --Russell
> 
> 
> > -----Original Message-----
> > From: bill at genenformics.com [mailto:bill at genenformics.com]
> > Sent: Tuesday, 22 September 2009 10:21 a.m.
> > To: Smithies, Russell
> > Cc: 'armendarez77 at hotmail.com'; 'bioperl-l at lists.open-bio.org'
> > Subject: Re: [Bioperl-l] New to Bio::Tools::Run::RemoteBlast
> >
> > BLAST DBs can be concatenated into a single target (.nal or .pal) file.
> >
> > Check this out:
> >
> > http://www.ncbi.nlm.nih.gov/Web/Newsltr/Winter00/blastlab.html
> >
> > Bill
> >
> > > You may need to setup blast locally (not a big job) as I don't think you
> > > can blast against multiple databases with B:T:R:RemoteBlast.
> > > Or you could do it manually on NCBI's site where you can filter results by
> > > entrez query (eg. 1239[taxid] for fermicutes)
> > > http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml#entrez_query
> > >
> > > --Russell
> > >
> > >
> > >> -----Original Message-----
> > >> From: bioperl-l-bounces at lists.open-bio.org [mailto:bioperl-l-
> > >> bounces at lists.open-bio.org] On Behalf Of armendarez77 at hotmail.com
> > >> Sent: Tuesday, 22 September 2009 9:01 a.m.
> > >> To: bioperl-l at lists.open-bio.org
> > >> Subject: [Bioperl-l] New to Bio::Tools::Run::RemoteBlast
> > >>
> > >>
> > >>
> > >>
> > >>
> > >>
> > >>
> > >> Hello,
> > >>
> > >> Is there a function to blast one query sequence against multiple blast
> > >> databases?  For example, I want to blast a sequence against all
> > >> Microbial
> > >> Genomes.  Currently, I can do it by placing multiple Microbial databases
> > >> (eg.
> > >> Microbial/100226, Microbial/101510, etc) into an array and iterate
> > >> through
> > >> them using a foreach loop.  Each individual database is placed in the
> > >> '-data'
> > >> parameter and the blast is performed.
> > >>
> > >> Example Code:
> > >>
> > >> use strict;
> > >> use Bio::Tools::Run::RemoteBlast;
> > >>
> > >> my @microbDbs = qw(Microbial/100226 Microbial/101510 Microbial/103690
> > >> Microbial/1063);
> > >> my $e_val= '1e-3';
> > >>
> > >> foreach my $db(@microbDbs){
> > >>   my @params = ( '-prog' => $prog,
> > >>                          '-data' => $db,
> > >>                          '-expect' => $e_val,
> > >>                          '-readmethod' => 'xml' );
> > >>
> > >>   my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
> > >>   my $v = 1;
> > >>   my $str = Bio::SeqIO->new(-file=>'test.fa' , '-format' => 'fasta' );
> > >>   while (my $input = $str->next_seq()){
> > >>     my $r = $factory->submit_blast($input);
> > >>
> > >>     #Code continues...
> > >>
> > >> }
> > >>
> > >> Is there a more efficient way to accomplish this?
> > >>
> > >> If this topic has been discussed please point the way.
> > >>
> > >> Thank you,
> > >>
> > >> Veronica
> > >>
> > >>
> > >> _________________________________________________________________
> > >> Microsoft brings you a new way to search the web.  Try  Bing(tm) now
> > >>
> >
> http://www.bing.com?form=MFEHPG&publ=WLHMTAG&crea=TEXT_MFEHPG_Core_tagline_try
> > >> bing_1x1
> > >> _______________________________________________
> > >> Bioperl-l mailing list
> > >> Bioperl-l at lists.open-bio.org
> > >> http://lists.open-bio.org/mailman/listinfo/bioperl-l
> > > =======================================================================
> > > Attention: The information contained in this message and/or attachments
> > > from AgResearch Limited is intended only for the persons or entities
> > > to which it is addressed and may contain confidential and/or privileged
> > > material. Any review, retransmission, dissemination or other use of, or
> > > taking of any action in reliance upon, this information by persons or
> > > entities other than the intended recipients is prohibited by AgResearch
> > > Limited. If you have received this message in error, please notify the
> > > sender immediately.
> > > =======================================================================
> > >
> > > _______________________________________________
> > > Bioperl-l mailing list
> > > Bioperl-l at lists.open-bio.org
> > > http://lists.open-bio.org/mailman/listinfo/bioperl-l
> > >
> >
> 
> 
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/bioperl-l




More information about the Bioperl-l mailing list