[Bioperl-l] wu-blast and bioperl

Jason Stajich jason.stajich at duke.edu
Mon May 23 20:38:55 EDT 2005


it is still not finding wublast hence your message, you don't want an  
equivalenced next_result method, you want to tell the module how to  
find the executable it needs.

I think you need to set the WUBLAST variable after the use  
Bio::Tools::Run::StandAloneBlast.

I'm not really looking at the rest of the code below though so I  
don't know if you've made other mistakes.
You can always just write a simple script to test that the basics work
use strict;
use Bio::Tools::Run::StandAloneBlast;
$ENV{WUBLASTDIR} = '/usr/local/WU-BLAST2.0';

my $factory = Bio::Tools::Run::StandAloneBlast->new();
print $factory->program_path();

-jason
On May 23, 2005, at 6:26 PM, Tuan A. Tran wrote:

> Hi,
>
> I tried to run wublast with bioperl. However, I got an error
>
> -------------------- WARNING ---------------------
> MSG: cannot find path to wublast
> ---------------------------------------------------
> Can't call method "next_result" on an undefined value at  
> testanno.pl line 225,
> <GEN0> line 1.
>
> Is there any method which is equivalent to "next_result" method? I
> really appreciate if anyone can help.
>
> My code lousy code is quoted below. I tested it with blastall from
> ncbi. It works fine.
> I am using debian sarge and I have wu-blast install in
> /usr/local/WU-BLAST2.0. Furthermore, I also installed new bioperl-1.5.
>
> Thanks in advance
> TAT
>
>
>
>
> =================
> #!/usr/local/lib/perl -w
>
> BEGIN {$ENV{WUBLASTDIR} = '/usr/local/WU-BLAST2.0/';};
>
> use Bio::Seq;
> use Bio::SeqIO;
> use Bio::SearchIO;
> use Bio::AlignIO;
> use Bio::SimpleAlign;
> use Bio::LocatableSeq;
> use Bio::Tools::Run::StandAloneBlast;
> use Getopt::Long;
> use Bio::DB::GenBank;
> use Bio::DB::Flat::BDB;
> #use Bio::Index::GenBank;
> use Bio::Index::Fasta;
> use Bio::SeqFeature::Generic;
> use DBI;
>
>
> #########
> # Declare database names
> #########
>
> my $db_core_fly = 'fly_core';
>
> ############
> #
> #  Define variables for tables in database  
> drosophila_melanogaster_core_30_3d
> #
> ###########
>
> my $tattrib = 'attrib_type';
> my $tseq_region = 'seq_region';
> my $tseq_region_attrib = 'seq_region_attrib';
> my $texon = 'exon';
> my $texon_stable_id = 'exon_stable_id';
> my $texon_transcript = 'exon_transcript';
> my $tgene = 'gene';
> my $tgene_stable_id = 'gene_stable_id';
> my $ttranscript = 'transcript';
> my $ttranscript_stable_id = 'transcript_stable_id';
> my $txref='xref';
> my $texternal_db = 'external_db';
> my $texternal_synonym = 'external_synonym';
>
> ###########
> # This subroutine is to tell Perl to communicate with my MySQL  
> database
> # Source: got it after googling
> ###########
>
> sub start_InputDB {
>    my $database = $_[0];
>    use DBI;
>    my $DBD = 'mysql';
>    my $host ='localhost';
>    my $user = 'tuan';
>    my $passwd = '';
>    my $inputdb = DBI->connect("DBI:$DBD:$database:$host","$user","",
>                                    {RaiseError => 1, AutoCommit =>  
> 1});
>    return $inputdb;
> }
>
> ######################
>
> sub feature2string {
>    my $f = shift;
>    #my $stable_id = $f->stable_id();
>    my $seq_region = $f->seq_region_name();
>    my $start = $f->start();
>    my $end = $f->end();
>    my $strand = $f->strand();
>    my $seq = $f->seq();
>    return ">$seq_region:$start - $end ($strand)\n$seq";
> }
>
> sub getGene {
>    my $f = shift;
>    #my $stable_id = $f->stable_id();
>    my $seq_region = $f->seq_region_name();
>    my $start = $f->start();
>    my $end = $f->end();
>    my $strand = $f->strand();
>    my $seq = $f->seq();
>    return ">$seq_region:$start - $end ($strand)\n$seq";
> }
>
> ###################################################################### 
> ######
> # Change number of organisms according to your problem
> # BE SURE TO CHANGE AS EXAMPLE BELOW
> ###################################################################### 
> ######
>
> my (@organism,$org_idx);
>
> my $max_number_organism = 3;
>
> @organism = ('fly');
>
> my $fly_dna = 'Drosophila_melanogaster.BDGP3.2.1.may.dna_rm.';
>
> my %fly_chromosome = (
>                 fly => [$fly_dna.$org_chro.'.2h',$fly_dna. 
> $org_chro.'.2L',
> $fly_dna.$org_chro.'.2R',$fly_dna.$org_chro.'.3h',$fly_dna. 
> $org_chro.'.3L',
> $fly_dna.$org_chro.'.3R',$fly_dna.$org_chro.'.4', $fly_dna. 
> $org_chro.'.4h',
> $fly_dna.$org_chro.'.U',$fly_dna.$org_chro.'.X', $fly_dna. 
> $org_chro.'.Xh',
>             $fly_dna.$org_chro.'.Yh']);
>
> my %fly_mysqldb = ( fly => ['fly_core']);
>
>
>
> ###################################################################### 
> ######
>
> my $meta='local';
>
> my $infile = shift;
>
> my $in  = Bio::SeqIO->new ( -file   => $infile,
>                    -format => 'fasta' );
>
> my $prefix="summary_";
> my $sumout = $prefix . $infile;
>
> $prefix="out_";
> $allout = $prefix . $infile;
>
> $datastatistics = "stat_" . $infile . ".dat";
>
>
> my $out = Bio::SeqIO->new ( -file   => '>seq_out.fa',
>                   -format => 'fasta' );
> my $fout = Bio::SeqIO->new(-fh => \*STDOUT , -format => 'fasta');
>
> #1111111111
> while(my $query=$in->next_seq())
> {
>
>    print ALLOUT ">",$query->id," ",$query->desc;
>    print ALLOUT "\t",$query->seq,"\n";
>    print SUMOUT ">",$query->id," ",$query->desc;
>    print SUMOUT "\t",$query->seq,"\n";
> #   print ">",$query->id," ",$query->desc;
> #   print "\t",$query->seq,"\n";
>
> #2222222222
> foreach $organism (@organism)
> {
>     my $dir = '/Crick/' . $organism . '/';
>
>
> #########################################
> # Go through each mysql database (mostly core and est)
> ########################################
> #333333333333
> foreach my $each_db (@{$fly_mysqldb{$organism}})
> {
>    my $inputdb = &start_InputDB($each_db);
>
> #######################################
> # Go through each chromosome of a species
> #######################################
> #4444444444444
> foreach my $org_chro (@{$fly_chromosome{$organism}})
> {
>
>     # database for blast
>     my($local_blastdb,$db,$fmt);
>     $local_blastdb = $dir . 'dna/' . $org_chro . '.fa';
>
> #print "is it here ", $local_blastdb, "\n";
>
>     $db = $dir . 'dna/' . $organism . '.index';
>     $fmt='Fasta';
>  my $blastout = "blast.out." . $infile;
>
>     my @param = ('program'  => 'wublastn','database' =>  
> $local_blastdb,
>              'E'=>10,'o'=>$blastout);
>     my $factory = Bio::Tools::Run::StandAloneBlast->new(@param,
> _READMETHOD => "Blast");
>     $wublast_path = $factory->program_path();
>     print $wublast_path, "\n";
>     $factory->q(-10000.0);
>     $factory->g(F);
>
> #my $blastout = "blast.out." . $infile . $organisms . "." . $org_chro
> . "." . $each_db;
>       my $blast_report = $factory->wublast($query);
>
>     ##########
>     # Look at every hits in blast output
>     #
>     ###########
>    #55555555555
>       while(my $result = $blast_report->next_result)
>      {
> print "number of hits: ",$result->num_hits,"\n";
>
>          if($result->num_hits > 0)
>          {
>
>             my $match_count=0;
>             my $hit_count=0;
>             my @myhits =$result->hits();
>
>             foreach my $myhit (@myhits)
>             {
>                # Count the number of high scoring pair
>                my $hsp_count=0;
>                  my @hsps = $myhit->hsps();
>
>                  foreach my $hsp (@hsps)
>                  {
> print "hsp_length:", $hsp->length, " and query_length:";
> print $query->length, "\n";
>
>                     #########
>                     # Counting the number of hits with the same  
> length as query sequence
>                     ########
>                     if($hsp->length eq $query->length)
>                     {
>                        $match_count++;
>                         $hsp_count++;
>                     }
>                  } # end of foreach my $hsp (@hsps)
>
>             } # end of foreach my $myhit (@myhits)
>
>             ##########
>              # Pick the top 4 of high scoring pair
>              ###########
>              if($match_count>0 && $match_count <= 4)
>              {
>
>                 my $temp_organism;
>                 if($temp_organism ne $organism) {
> print        $organism,": ",$match_count," full length hsps\n";
>                 }
>                 $temp_organism = $organism;
>
>                  foreach my $hit (@myhits)
>                 {
>                   my $hsp_count=0;
>                   my @hsps = $hit->hsps();
>                   foreach my $hsp (@hsps)
>                   {
>                     # if the length of the high scoring pair is  
> equal to
>                     # the length of input sequence, then this is a  
> good hit
>                     if($hsp->length eq $query->length)
>                         {
>                             $hsp_count++;
>                         }
>                 }
>
> print "hsp_count: $hsp_count.\n";
>
>                   if($hsp_count!=0)
>                   {
>
> print $hit->name, " : $hsp_count full length hsps\n";
>                   }
>
>                   if($hsp_count>0 )
>                   {
>                     my $id = $hit->name;
>                     my($dbobj,$seqio);
>
>
>                     foreach my $hsp (@hsps)
>                     {
>                           $hsp_start = $hsp->query->start;
>                           my $start = $hsp->hit->start(); my $end =  
> $hsp->hit->end();
>                           if($hsp->length eq $query->length)
>                           {
> print "HIT: ",$hsp->hit_string,"  ",$hit->name;
>
>                            my $flank_start=$start-100;
>                         if($flank_start<1)
>                         {
>                               $flank_start=1;
>                         }
>
>                         my $flank_end=$end+100;
>                         if($flank_end>$hit->length)
>                         {
>                               $flank_end=$hit->length;
>                         }
>                         my $flank_len=$flank_start-$flank_end+1;
>
>
>
>
> ############################################################
>
>                             } #end of if($hsp->length eq $query- 
> >length)
>
>                             } # end of foreach my $hsp (@hsps)
>                         } # end of if($hsp_count>0 && $hsp_count<4)
>                     } # end of foreach my $hit (@myhits)
>                 } # end of if($match_count>0 && $match_count<4)
>             } # end of if($result->num_hits>0)
>
>    #55555555555
>       } # end of while(my $result = $blast_report->next_result)
>    #55555555555
>
> #444444444444
> } # end of for $each_chromosome (keys %insect_chromosome)
> #444444444444
>
>
> #333333333333
> } # end of foreach my $each_db (@{$insect_mysqldb{$organism}})
> #333333333333
>
> #2222222222
> } # end of for($org_idx=0; $org_idx < max_number_org; org_idx++)
> #2222222222
>
> #1111111111
> } # end of while(my $query=$in->next_seq())
> #1111111111
>
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l at portal.open-bio.org
> http://portal.open-bio.org/mailman/listinfo/bioperl-l
>

--
Jason Stajich
Duke University
http://www.duke.edu/~jes12/




More information about the Bioperl-l mailing list