[Bioperl-l] Regarding blast in Bioperl
Mark A. Jensen
maj at fortinbras.us
Sat Jan 9 13:05:41 EST 2010
I see it immediately (from making same bug many times) :
my $factory = Bio::Tools::Run::RemoteBlast->new(@params, -ENTREZ_QUERY =>
- '$organ[ORGN]');
+"$organ[ORGN]");
MAJ
----- Original Message -----
From: "Roopa Raghuveer" <rtbio.2009 at gmail.com>
To: "Mark A. Jensen" <maj at fortinbras.us>
Cc: <bioperl-l at lists.open-bio.org>
Sent: Saturday, January 09, 2010 11:57 AM
Subject: Re: [Bioperl-l] Regarding blast in Bioperl
> Hello all,
>
> Thanks alot for your reply Mark. It was working for Trypanosoma brucei as
> the organism parameter,but when I tried to use the Organism parameter from
> the user,it was not working i.e., I was unable to get the target sequences.
> Please help me in this regard. My code is
>
> #!/usr/bin/perl
>
> #path for extra camel module
> use lib "/srv/www/htdocs/rain/RNAi/";
> use Roopablast;
>
>
> use Bio::SearchIO;
> use Bio::Search::Result::BlastResult;
> use Bio::Perl;
> use Bio::Tools::Run::RemoteBlast;
> use Bio::Seq;
> use Bio::SeqIO;
> use Bio::DB::GenBank;
>
> $serverpath = "/srv/www/htdocs/rain/RNAi";
> $serverurl = "http://141.84.66.66/rain/RNAi";
> $outfile = $serverpath."/rnairesult_".time().".html";
> $nuc = $serverpath."/nuc".time().".txt";
> $debugfile = $serverpath."/debug_".time().".txt";
> $blastdebugfile = $serverpath."/blastdebug_".time().".txt";
>
> my $outstring ="";
>
> &parse_form;
>
> print "Content-type: text/html\n\n";
> print "<HTML>\n";
> print "<head><title>RNAi Result</title>";
> print "<META HTTP-EQUIV=\"Refresh\" CONTENT=\"30;
> URL=$serverurl/rnairesult_".time().".html\"> \n";
> print "</head>\n";
> print "<body>\n";
> print " Your results will appear <a
> href=$serverurl/rnairesult_".time().".html>here</a><br>";
> print " Please be patient, runtime can be up to 5 minutes<br>";
> print " This page will automatically reload in 30 seconds. Roopa";
> print "</BODY>\n";
> print "</HTML>\n";
>
> defined(my $pid = fork) or die "Can't fork: $!";
> exit if $pid;
> open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
> open STDOUT, '>/dev/null' or die "Can't write to /dev/null: $!";
> open STDERR, '>&STDOUT' or die "Can't dup stdout: $!";
>
> open(OUTFILE, '>',$outfile);
>
> print OUTFILE "<HTML>\n
> <head><title>RNAi Result</title>
> <META HTTP-EQUIV=\"Refresh\" CONTENT=\"30;
> URL=$serverurl//rnairesult_".time().".html\"> \n
> <meta http-equiv=\"expires\" content=\"0\">
> </head>\n
> <body>\n
> Your results will appear <a
> href=$serverurl/rnairesult_".time().".html>here</a><br>
> Please be patient, runtime can be up to 5 minutes wait wait wait......<br>
> This page will automatically reload in 30 seconds Roopa <br>
> </BODY>\n
> </HTML>\n";
>
> close(OUTFILE);
>
>
> @compseqs = blastcode($in{'Inputseq'},$in{'Organism'});
>
> $in{'Inputseq'} =~ s/>.*$//m;
> $in{'Inputseq'} =~ s/[^TAGC]//gim;
> $in{'Inputseq'} =~ tr/actg/ACTG/;
>
> @out = similar($in{'Inputseq'}, \@compseqs, $in{'Windowsize'},
> $in{'Threshold'});
>
>
> sub blastcode
> {
>
> $inpu1= $_[0];
>
> $organ= $_[1];
>
> open(NUC,'>',$nuc);
> print NUC $inpu1,"\n";
> close(NUC);
>
> my $prog = 'blastn';
> my $db = 'refseq_rna';
> my $e_val= '1e-10';
> my $organism= $organ;
>
> $gb = new Bio::DB::GenBank;
>
> my @params = ( '-prog' => $prog,
> '-data' => $db,
> '-expect' => $e_val,
> '-readmethod' => 'SearchIO',
> '-Organism' => $organism );
>
> open(OUTFILE,'>',$debugfile);
> print OUTFILE $inpu1;
> close(OUTFILE);
>
>
> my $factory = Bio::Tools::Run::RemoteBlast->new(@params, -ENTREZ_QUERY =>
> '$organ[ORGN]');
>
> #my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
>
> #change a paramter
>
> #$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Trypanosoma
> Brucei[ORGN]';
>
> #change a paramter
> # $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = '$input2[ORGN]';
>
> my $v = 1;
> #$v is just to turn on and off the messages
>
> my $str = Bio::SeqIO->new('-file' => $nuc , '-format' => 'fasta' ,
> '-organism' => $organ );
>
>
> while (my $input = $str->next_seq())
> {
> #Blast a sequence against a database:
> #Alternatively, you could pass in a file with many
> #sequences rather than loop through sequence one at a time
> #Remove the loop starting 'while (my $input = $str->next_seq())'
> #and swap the two lines below for an example of that.
>
> #open(OUTFILE,'>',$debugfile);
> # print OUTFILE $input;
> #close(OUTFILE);
>
>
> my $r = $factory->submit_blast($input);
>
> open(OUTFILE,'>',$debugfile);
> # print OUTFILE $r;
> close(OUTFILE);
>
> print STDERR "waiting...." if($v>0);
>
> while ( my @rids = $factory->each_rid ) {
> # open(OUTFILE,'>',$debugfile);
> # print OUTFILE "while entered";
> # close(OUTFILE);
> foreach my $rid ( @rids ) {
>
> # open(OUTFILE,'>',$debugfile);
> # print OUTFILE "foreach entered";
> # close(OUTFILE);
>
> my $rc = $factory->retrieve_blast($rid);
>
> if( !ref($rc) )
> {
> if( $rc < 0 )
> {
> $factory->remove_rid($rid);
> }
> open(OUTFILE,'>',$debugfile);
> # print OUTFILE "if entered";
> close(OUTFILE);
> print STDERR "." if ( $v > 0 );
> sleep 5;
> }
> else {
> # open(OUTFILE,'>',$debugfile);
> # print OUTFILE "else entered";
> # close(OUTFILE);
>
> my $result = $rc->next_result();
> #save the output
> $blastdebugfile = $serverpath."/blastdebug_".time().".txt";
>
> open(BLASTDEBUGFILE,'>',$blastdebugfile);
> print BLASTDEBUGFILE $result->next_hit();
> close(BLASTDEBUGFILE);
>
> my $filename =
> $serverpath."/blastdata_".time().$result->query_name()."\.out";
>
> # open(DEBUGFILE,'>',$debugfile);
> # open(new,'>',$filename);
> # @arra=<new>;
> # print DEBUGFILE @arra;
> # close(DEBUGFILE);
> # close(new);
>
> $factory->save_output($filename);
> # open(BLASTDEBUGFILE,'>',$debugfile);
> # print BLASTDEBUGFILE "Hello $rid";
> # close(BLASTDEBUGFILE);
>
> $factory->remove_rid($rid);
>
> open(BLASTDEBUGFILE,'>',$blastdebugfile);
> print BLASTDEBUGFILE $organism;
> close(BLASTDEBUGFILE);
>
> # open(OUTFILE,'>',$outfile);
> # print OUTFILE "Test2 $result->database_name()";
> # close(OUTFILE);
>
> #$hit = $result->next_hit;
> #open(new,'>',$debugfile);
> #print $hit;
> #close(new);
>
> while ( my $hit = $result->next_hit ) {
>
> next unless ( $v > 0);
>
> # open(OUTFILE,'>',$debugfile);
> # print OUTFILE "$hit in while hits";
> # close(OUTFILE);
>
> my $sequ = $gb->get_Seq_by_version($hit->name);
> my $dna = $sequ->seq(); # get the sequence as a string
> push(@seqs,$dna);
> }
> }
> }
> }
> }
>
> #open(OUTFILE,'>',$debugfile);
> #print OUTFILE $seqs[0];
> #close(OUTFILE);
>
> return(@seqs);
>
> }
>
> Regards,
> Roopa.
>
>
> On Fri, Jan 8, 2010 at 4:36 PM, Mark A. Jensen <maj at fortinbras.us> wrote:
>
>> Hi Roopa--
>>
>> I got your code to work with the following changes:
>>
>> +# the input should be a valid FASTA file...
>> ...
>> open(NUC,'>',$nuc);
>> +print NUC ">seq (need a name line for valid fasta)\n";
>> print NUC $inpu1, "\n";
>> close(NUC);
>> ...
>>
>> +# you can set these header parms in the call itself...
>> - my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
>> + my $factory = Bio::Tools::Run::RemoteBlast->new(@params, -ENTREZ_QUERY =>
>> ''Trypanosoma Brucei[ORGN]');
>>
>> #change a paramter
>> +# commented this out...
>> +# $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Trypanosoma
>> Brucei[ORGN]';
>>
>> MAJ
>> ----- Original Message ----- From: "Roopa Raghuveer" <rtbio.2009 at gmail.com
>> >
>> To: <bioperl-l at lists.open-bio.org>
>> Sent: Friday, January 08, 2010 10:00 AM
>> Subject: [Bioperl-l] Regarding blast in Bioperl
>>
>>
>> Hello all,
>>>
>>> I was trying Remote blast using Bioperl. My input data is a Trypanosoma
>>> brucei sequence in Fasta format. When I was trying to submit to BLAST
>>> using
>>> the step
>>> $r=$factory->submit_blast($input)
>>> It was not returning anything which I checked by debugging the code. It is
>>> not blasting my input sequence even though I mentioned all the
>>> parameters.I
>>> would paste the code below.
>>>
>>> Please help me in solving put this problem. It is very urgent.
>>>
>>> Regards
>>> Roopa.
>>>
>>> #!/usr/bin/perl
>>>
>>> #path for extra camel module
>>> use lib "/srv/www/htdocs/rain/RNAi/";
>>> use Roopablast;
>>>
>>>
>>> use Bio::SearchIO;
>>> use Bio::Search::Result::BlastResult;
>>> use Bio::Perl;
>>> use Bio::Tools::Run::RemoteBlast;
>>> use Bio::Seq;
>>> use Bio::SeqIO;
>>> use Bio::DB::GenBank;
>>>
>>> $serverpath = "/srv/www/htdocs/rain/RNAi";
>>> $serverurl = "http://141.84.66.66/rain/RNAi";
>>> $outfile = $serverpath."/rnairesult_".time().".html";
>>> $nuc = $serverpath."/nuc".time().".txt";
>>> $debugfile = $serverpath."/debug_".time().".txt";
>>> $blastdebugfile = $serverpath."/blastdebug_".time().".txt";
>>>
>>> my $outstring ="";
>>>
>>> &parse_form;
>>>
>>> print "Content-type: text/html\n\n";
>>> print "<HTML>\n";
>>> print "<head><title>RNAi Result</title>";
>>> print "<META HTTP-EQUIV=\"Refresh\" CONTENT=\"30;
>>> URL=$serverurl/rnairesult_".time().".html\"> \n";
>>> print "</head>\n";
>>> print "<body>\n";
>>> print " Your results will appear <a
>>> href=$serverurl/rnairesult_".time().".html>here</a><br>";
>>> print " Please be patient, runtime can be up to 5 minutes<br>";
>>> print " This page will automatically reload in 30 seconds. Roopa";
>>> print "</BODY>\n";
>>> print "</HTML>\n";
>>>
>>> defined(my $pid = fork) or die "Can't fork: $!";
>>> exit if $pid;
>>> open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
>>> open STDOUT, '>/dev/null' or die "Can't write to /dev/null: $!";
>>> open STDERR, '>&STDOUT' or die "Can't dup stdout: $!";
>>>
>>>
>>>
>>> open(OUTFILE, '>',$outfile);
>>>
>>> print OUTFILE "<HTML>\n
>>> <head><title>RNAi Result</title>
>>> <META HTTP-EQUIV=\"Refresh\" CONTENT=\"30;
>>> URL=$serverurl//rnairesult_".time().".html\"> \n
>>> <meta http-equiv=\"expires\" content=\"0\">
>>> </head>\n
>>> <body>\n
>>> Your results will appear <a
>>> href=$serverurl/rnairesult_".time().".html>here</a><br>
>>> Please be patient, runtime can be up to 5 minutes wait wait
>>> wait......<br>
>>> This page will automatically reload in 30 seconds Roopa <br>
>>> </BODY>\n
>>> </HTML>\n";
>>>
>>> close(OUTFILE);
>>>
>>>
>>> @compseqs = blastcode($in{'Inputseq'});
>>>
>>> $in{'Inputseq'} =~ s/>.*$//m;
>>> $in{'Inputseq'} =~ s/[^TAGC]//gim;
>>> $in{'Inputseq'} =~ tr/actg/ACTG/;
>>>
>>> @out = similar($in{'Inputseq'}, \@compseqs, $in{'Windowsize'},
>>> $in{'Threshold'});
>>>
>>>
>>> sub blastcode
>>> {
>>>
>>> $inpu1= $_[0];
>>>
>>> #$organ= $_[1];
>>>
>>> open(NUC,'>',$nuc);
>>> print NUC $inpu1;
>>> close(NUC);
>>>
>>> my $prog = 'blastn';
>>> my $db = 'refseq_rna';
>>> my $e_val= '1e-10';
>>> my $organism= 'Trypanosoma Brucei';
>>>
>>> $gb = new Bio::DB::GenBank;
>>>
>>> my @params = ( '-prog' => $prog,
>>> '-data' => $db,
>>> '-expect' => $e_val,
>>> '-readmethod' => 'SearchIO',
>>> '-Organism' => $organism );
>>>
>>> # open(OUTFILE,'>',$debugfile);
>>> # print OUTFILE @params;
>>> # close(OUTFILE);
>>>
>>>
>>> my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
>>>
>>> #change a paramter
>>>
>>> $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Trypanosoma
>>> Brucei[ORGN]';
>>>
>>> #change a paramter
>>> # $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = '$input2[ORGN]';
>>>
>>> my $v = 1;
>>> #$v is just to turn on and off the messages
>>>
>>> my $str = Bio::SeqIO->new('-file' => $nuc , '-format' => 'fasta' ,
>>> '-organism' => 'Trypanosoma Brucei' );
>>>
>>>
>>> while (my $input = $str->next_seq())
>>> {
>>> #Blast a sequence against a database:
>>> #Alternatively, you could pass in a file with many
>>> #sequences rather than loop through sequence one at a time
>>> #Remove the loop starting 'while (my $input = $str->next_seq())'
>>> #and swap the two lines below for an example of that.
>>>
>>> open(OUTFILE,'>',$debugfile);
>>> print OUTFILE $input;
>>> close(OUTFILE);
>>>
>>>
>>> my $r = $factory->submit_blast($input); #The program stops here it
>>> does not return any value and it does not enter the While loop,Please help
>>> me in this regard.#
>>> open(OUTFILE,'>',$debugfile);
>>> print OUTFILE $r;
>>> close(OUTFILE);
>>>
>>>
>>> print STDERR "waiting...." if($v>0);
>>>
>>> while ( my @rids = $factory->each_rid ) {
>>> open(OUTFILE,'>',$debugfile);
>>> print OUTFILE "while entered";
>>> close(OUTFILE);
>>> foreach my $rid ( @rids ) {
>>>
>>> open(OUTFILE,'>',$debugfile);
>>> print OUTFILE "foreach entered";
>>> close(OUTFILE);
>>>
>>> my $rc = $factory->retrieve_blast($rid);
>>>
>>> if( !ref($rc) )
>>> {
>>> if( $rc < 0 )
>>> {
>>> $factory->remove_rid($rid);
>>> }
>>> open(OUTFILE,'>',$debugfile);
>>> print OUTFILE "if entered";
>>> close(OUTFILE);
>>> print STDERR "." if ( $v > 0 );
>>> sleep 5;
>>> }
>>> else {
>>> open(OUTFILE,'>',$debugfile);
>>> print OUTFILE "else entered";
>>> close(OUTFILE);
>>>
>>> my $result = $rc->next_result();
>>> #save the output
>>> $blastdebugfile = $serverpath."/blastdebug_".time().".txt";
>>>
>>> open(BLASTDEBUGFILE,'>',$blastdebugfile);
>>> print BLASTDEBUGFILE $result->next_hit();
>>> close(BLASTDEBUGFILE);
>>>
>>> my $filename =
>>> $serverpath."/blastdata_".time().$result->query_name()."\.out";
>>>
>>> # open(DEBUGFILE,'>',$debugfile);
>>> # open(new,'>',$filename);
>>> # @arra=<new>;
>>> # print DEBUGFILE @arra;
>>> # close(DEBUGFILE);
>>> # close(new);
>>>
>>> $factory->save_output($filename);
>>>
>>> # open(BLASTDEBUGFILE,'>',$debugfile);
>>> # print BLASTDEBUGFILE "Hello $rid";
>>> # close(BLASTDEBUGFILE);
>>>
>>> $factory->remove_rid($rid);
>>>
>>> open(BLASTDEBUGFILE,'>',$blastdebugfile);
>>> print BLASTDEBUGFILE $organism;
>>> close(BLASTDEBUGFILE);
>>>
>>> # open(OUTFILE,'>',$outfile);
>>> # print OUTFILE "Test2 $result->database_name()";
>>> # close(OUTFILE);
>>>
>>> #$hit = $result->next_hit;
>>> #open(new,'>',$debugfile);
>>> #print $hit;
>>> #close(new);
>>>
>>> while ( my $hit = $result->next_hit ) {
>>>
>>> next unless ( $v > 0);
>>>
>>> # open(OUTFILE,'>',$debugfile);
>>> # print OUTFILE "$hit in while hits";
>>> # close(OUTFILE);
>>>
>>> my $sequ = $gb->get_Seq_by_version($hit->name);
>>> my $dna = $sequ->seq(); # get the sequence as a string
>>> push(@seqs,$dna);
>>> }
>>> }
>>> }
>>> }
>>> }
>>>
>>> #open(OUTFILE,'>',$debugfile);
>>> #print OUTFILE $seqs[0];
>>> #close(OUTFILE);
>>>
>>> return(@seqs);
>>>
>>> }
>>>
>>> open(OUTFILE, '>',$outfile) || die ;
>>>
>>> print OUTFILE "<HTML>\n
>>> <head><title>RNAi Result</title>
>>> <meta http-equiv=\"expires\" content=\"0\"></head>\n
>>> <body>\n
>>> <p><font face=\"Courier, monospace font set\">
>>> Inputsequence: <br>";
>>>
>>> for ($i=0; $i<length ($in{'Inputseq'}); $i++) {
>>>
>>> print OUTFILE substr ($in{'Inputseq'}, $i, 1);
>>>
>>> if ( ($i+1)%10==0){
>>> print OUTFILE " ";
>>> }
>>> if ( ($i+1)%60==0){
>>> print OUTFILE "<br>\n";
>>> }
>>> }
>>>
>>>
>>>
>>> print OUTFILE "</font> <p>";
>>>
>>> $z=@compseqs;
>>>
>>> for($k=1;$k<$z;$k++) {
>>> print OUTFILE "<font face=\"Courier, monospace font set\"><p>Compare
>>> Sequence: <br>";
>>>
>>> for ($i=0; $i<length ($compseqs[$k]); $i++) {
>>>
>>> print OUTFILE substr ($compseqs[$k], $i, 1);
>>>
>>> if ( ($i+1)%10==0){
>>> print OUTFILE " ";
>>> }
>>> if ( ($i+1)%60==0){
>>> print OUTFILE "<br>\n";
>>> }
>>> }
>>> print OUTFILE "<p></font>";
>>> }
>>>
>>> print OUTFILE "<p>
>>> Window: <br>$in{'Windowsize'}
>>> <p>
>>> <p>
>>> Threshold: <br>$in{'Threshold'}
>>> <p>";
>>> my $j=0;
>>>
>>> for ($i=0; $i<length ($in{'Inputseq'}); $i++) {
>>>
>>> if ($i<=length ($in{'Inputseq'})-$in{'Windowsize'}){
>>> if ($out[$i]->{similar}<=$in{'Threshold'}){
>>> $j=$in{'Windowsize'};
>>> }
>>> $height=$out[$i]->{similar}*5;
>>> }
>>>
>>> if ($j>0) {
>>> print OUTFILE "<img src=\"$serverurl/blue.gif\" width=\"1\"
>>> height=\"5\">";
>>> $outstring .= "<font color=\"green\">".substr ($in{'Inputseq'}, $i,
>>> 1)."</font>";
>>> $j--;
>>> }
>>> else {
>>> print OUTFILE "<img src=\"$serverurl/red.gif\" width=\"1\"
>>> height=\"5\">";
>>> $outstring .= "<font color=\"red\">".substr ($in{'Inputseq'}, $i,
>>> 1)."</font>";
>>> }
>>>
>>> if ( ($i+1)%10==0){
>>> $outstring .= " ";
>>> }
>>> if ( ($i+1)%60==0){
>>> $outstring .= "<br>\n";
>>>
>>> }
>>> if ( ($i+1)%800==0){
>>> print OUTFILE "<br><br>\n";
>>>
>>> }
>>> }
>>>
>>> print OUTFILE "<br><br><font face=\"Courier, monospace font
>>> set\">$outstring</font>";
>>>
>>> #foreach (@out) {
>>> #print OUTFILE "<p>Sequence: $_->{sequence}: $_->{similar} matchs<p>";
>>> #if ($_->{similar}<=$in{'Threshold'}){
>>>
>>> # }
>>> #}
>>>
>>> print OUTFILE "</BODY>\n</HTML>\n";
>>>
>>> close OUTFILE;
>>>
>>> #nameprint();
>>>
>>> sub parse_form {
>>> local ($buffer, @pairs, $pair, $name, $value);
>>> # Read in text
>>> $ENV{'REQUEST_METHOD'} =~ tr/a-z/A-Z/;
>>> if ($ENV{'REQUEST_METHOD'} eq "POST")
>>> {
>>> read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
>>> }
>>> else
>>> {
>>> $buffer = $ENV{'QUERY_STRING'};
>>> }
>>> @pairs = split(/&/, $buffer);
>>> foreach $pair (@pairs)
>>> {
>>> ($name, $value) = split(/=/, $pair);
>>> $value =~ tr/+/ /;
>>> $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
>>> $in{$name} = $value;
>>> }
>>> }
>>> _______________________________________________
>>> Bioperl-l mailing list
>>> Bioperl-l at lists.open-bio.org
>>> http://lists.open-bio.org/mailman/listinfo/bioperl-l
>>>
>>>
>>>
>>
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/bioperl-l
>
>
More information about the Bioperl-l
mailing list