[Bioperl-l] bioperl modules

Tue Nov 2 01:29:50 EDT 2010

Hi, all.
 I ran the bioperl script on suse10, the perl version was 5.8.7 and bioperl version was 1.6 .  But there were some mistake, the following was the message the linux system feelback:

liaoy at linux:~/nd-hn> perl nd-hn.pl

Can't locate Bio/Tools/Run/Alignment/Clustalw.pm in @INC (@INC contains: /usr/lib/perl5/5.8.7/i586-linux-thread-multi/usr/lib/perl5/5.8.7/usr/lib/perl5/site_perl/5.8.7/i586-linux-thread-multi/usr/lib/perl5/site_perl/5.8.7/usr/lib/perl5/site_perl/usr/lib/perl5/vendor_perl/5.8.7/i586-linux-thread-multi/usr/lib/perl5/vendor_perl/5.8.7 /usr/lib/perl5/vendor_perl .) at nd-hn.pl line 26.

BEGIN failed--compilation aborted at nd-hn.pl line 26.
 The detail of this script in two ways: the attached file and the following script.
Would you be kindly to test my script and give me some modification or suggestion?
 I myself tested the script some time ago, and my judge was that: the first part(retrieve sequences from remote genebank) and the third part(get the conservative sequences of the multi- sequences alignment outputfile) were right, and  the second part(using the Clustalw to do the multi- sequences alignment) encountered some problem? What the problem is? How can I correct it?
Thank you very much for your advice.

Best regards
Yusheng Liao 2010-11-02

# retrive sequence from Genbank
use strict;
use Bio::DB::GenBank;
use Bio::SeqIO;
my $gb = new Bio::DB::GenBank;
open(OUT,">HCV-5UTR-2.txt")||die "Can't open the file!";
my $seqout = new Bio::SeqIO(-fh => \*OUT, -format => 'fasta');
my $query = Bio::DB::Query::GenBank->new
    (-query   =>'Hepatitis C virus[Organism] AND genotype=1a and 5 UTR',     
   -db      => 'nucleotide');
my $seqio = $gb->get_Stream_by_query($query);
my $i=0;
while( defined (my $seq = $seqio->next_seq )) {
$seqout->write_seq($seq);
$i++;
print ".";
}
print "$i \n";
# run multi sequence alignment
use Bio::Tools::Run::Alignment::Clustalw;
use Bio::SimpleAlign;
use Bio::AlignIO::clustalw;
my @params = ('ktuple' => 2, 'matrix' => 'BLOSUM','output'=>'mdf','outfile'=>'HCV-5UTR-2.aln');
my $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
my $ktuple = 3;
$factory->ktuple($ktuple); 
my $inputfilename = '/home/Tonny/perl-5.10.0/HCV-5UTR-2.txt';
my $aln = $factory->align($inputfilename);
my $alignout->write_aln(my $aln);

my $line;
my @array=('A','C','G','T','-');
my @arrayX=('a','c','g','t','-');
my @arrayA;
my @arrayB;
my ($i,$j,$k);
my (@a, at b);
my %hash;
open FILE, "/home/Tonny/perl-5.10.0/HCV-5UTR-2.aln" or die;
open OUT, ">getAlignHCV-5 UTR-1a.txt" or die;
open OUTA, ">getConsensusHCV-5 UTR-1a.txt" or die;
while($line=<FILE>){
      chomp;
      if($line=~ /CLUSTAL/){
            next;
      }elsif($line=~ /^[A-Za-z0-9]/){
            @a=split /\s+/,$line;
            if(defined($hash{$a[0]})){
                  push @{$hash{$a[0]}}, $line;
            }else{
                  ${$hash{$a[0]}}[0]=$line;
                  push @b,$a[0];
            }
       }
}
for(my $k=0;$k<=$#{$hash{$a[0]}};$k++){    
      for(my $l=0;$l<=$#b;$l++){
            print OUT ${$hash{$b[$l]}}[$k];
            my $seq=(split /\s+/,${$hash{$b[$l]}}[$k])[1];
            @arrayA=split //,$seq;
            for($i=0;$i<=$#arrayA;$i++){
                  if($arrayA[$i] =~ /A/i){
                        $arrayB[0][$i]++;
                        $arrayB[5][$i]++;
                  }
                  if($arrayA[$i] =~ /C/i){
                        $arrayB[1][$i]++;
                        $arrayB[5][$i]++;
                  }
                  if($arrayA[$i] =~ /G/i){
                        $arrayB[2][$i]++;
                        $arrayB[5][$i]++;
                  }
                  if($arrayA[$i] =~ /T/i){
                        $arrayB[3][$i]++;
                        $arrayB[5][$i]++;
                  }
                  if($arrayA[$i] =~ /\./i){
                        $arrayB[4][$i]++;
                        $arrayB[5][$i]++;
                  }
                  if($arrayA[$i] =~ /\-/i){
                        $arrayB[4][$i]++;
                        $arrayB[5][$i]++;
                  }
            }
       }
       for($j=0;$j<=$#arrayA;$j++){
              my $large=0;my $pos=0;
              for($i=0;$i<=4;$i++){
                    $arrayB[$i][$j]=$arrayB[$i][$j]/$arrayB[5][$j];
                    if($arrayB[$i][$j]>$large){
                          $large=$arrayB[$i][$j];
                          $pos=$i;
                    }
              }
                    if($large>0.5){
                            $arrayB[5][$j]=$large;
                            $arrayB[6][$j]=$array[$pos];
                    }else{
                            $arrayB[5][$j]=$large;
                            $arrayB[6][$j]='N';
                    }
       }
       for($i=0;$i<=5;$i++){
              for($j=0;$j<=15;$j++){
                    print OUT " ";
              }
              for($j=0;$j<=$#arrayA;$j++){
                    if($arrayB[$i][$j]>0){
                           printf OUT "%.2f\t",$arrayB[$i][$j];
                    }else{
                           print OUT "0\t";
                    }
              }
              print OUT "\n";
        }
              for($j=0;$j<=15;$j++){
                    print OUT " ";
              }
              for($j=0;$j<=$#arrayA;$j++){
                      print OUT "$arrayB[6][$j]\t";
              }
              print OUT "\n";
              for($j=0;$j<=15;$j++){
                    print OUTA " ";
              }
              for($j=0;$j<=$#arrayA;$j++){
                      print OUTA "$arrayB[6][$j]";
              }
              print OUTA "\n";
        @arrayB=undef;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: Moduletest.pl.pl
Type: application/octet-stream
Size: 4525 bytes
Desc: not available
URL: <http://lists.open-bio.org/pipermail/bioperl-l/attachments/20101102/c720f038/attachment.obj>