[Bioperl-l] bioperl modules
hanbobio
hanbobio at 126.com
Tue Nov 2 01:29:50 EDT 2010
Hi, all.
I ran the bioperl script on suse10, the perl version was 5.8.7 and bioperl version was 1.6 . But there were some mistake, the following was the message the linux system feelback:
liaoy at linux:~/nd-hn> perl nd-hn.pl
Can't locate Bio/Tools/Run/Alignment/Clustalw.pm in @INC (@INC contains: /usr/lib/perl5/5.8.7/i586-linux-thread-multi/usr/lib/perl5/5.8.7/usr/lib/perl5/site_perl/5.8.7/i586-linux-thread-multi/usr/lib/perl5/site_perl/5.8.7/usr/lib/perl5/site_perl/usr/lib/perl5/vendor_perl/5.8.7/i586-linux-thread-multi/usr/lib/perl5/vendor_perl/5.8.7 /usr/lib/perl5/vendor_perl .) at nd-hn.pl line 26.
BEGIN failed--compilation aborted at nd-hn.pl line 26.
The detail of this script in two ways: the attached file and the following script.
Would you be kindly to test my script and give me some modification or suggestion?
I myself tested the script some time ago, and my judge was that: the first part(retrieve sequences from remote genebank) and the third part(get the conservative sequences of the multi- sequences alignment outputfile) were right, and the second part(using the Clustalw to do the multi- sequences alignment) encountered some problem? What the problem is? How can I correct it?
Thank you very much for your advice.
Best regards
Yusheng Liao 2010-11-02
# retrive sequence from Genbank
use strict;
use Bio::DB::GenBank;
use Bio::SeqIO;
my $gb = new Bio::DB::GenBank;
open(OUT,">HCV-5UTR-2.txt")||die "Can't open the file!";
my $seqout = new Bio::SeqIO(-fh => \*OUT, -format => 'fasta');
my $query = Bio::DB::Query::GenBank->new
(-query =>'Hepatitis C virus[Organism] AND genotype=1a and 5 UTR',
-db => 'nucleotide');
my $seqio = $gb->get_Stream_by_query($query);
my $i=0;
while( defined (my $seq = $seqio->next_seq )) {
$seqout->write_seq($seq);
$i++;
print ".";
}
print "$i \n";
# run multi sequence alignment
use Bio::Tools::Run::Alignment::Clustalw;
use Bio::SimpleAlign;
use Bio::AlignIO::clustalw;
my @params = ('ktuple' => 2, 'matrix' => 'BLOSUM','output'=>'mdf','outfile'=>'HCV-5UTR-2.aln');
my $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
my $ktuple = 3;
$factory->ktuple($ktuple);
my $inputfilename = '/home/Tonny/perl-5.10.0/HCV-5UTR-2.txt';
my $aln = $factory->align($inputfilename);
my $alignout->write_aln(my $aln);
my $line;
my @array=('A','C','G','T','-');
my @arrayX=('a','c','g','t','-');
my @arrayA;
my @arrayB;
my ($i,$j,$k);
my (@a, at b);
my %hash;
open FILE, "/home/Tonny/perl-5.10.0/HCV-5UTR-2.aln" or die;
open OUT, ">getAlignHCV-5 UTR-1a.txt" or die;
open OUTA, ">getConsensusHCV-5 UTR-1a.txt" or die;
while($line=<FILE>){
chomp;
if($line=~ /CLUSTAL/){
next;
}elsif($line=~ /^[A-Za-z0-9]/){
@a=split /\s+/,$line;
if(defined($hash{$a[0]})){
push @{$hash{$a[0]}}, $line;
}else{
${$hash{$a[0]}}[0]=$line;
push @b,$a[0];
}
}
}
for(my $k=0;$k<=$#{$hash{$a[0]}};$k++){
for(my $l=0;$l<=$#b;$l++){
print OUT ${$hash{$b[$l]}}[$k];
my $seq=(split /\s+/,${$hash{$b[$l]}}[$k])[1];
@arrayA=split //,$seq;
for($i=0;$i<=$#arrayA;$i++){
if($arrayA[$i] =~ /A/i){
$arrayB[0][$i]++;
$arrayB[5][$i]++;
}
if($arrayA[$i] =~ /C/i){
$arrayB[1][$i]++;
$arrayB[5][$i]++;
}
if($arrayA[$i] =~ /G/i){
$arrayB[2][$i]++;
$arrayB[5][$i]++;
}
if($arrayA[$i] =~ /T/i){
$arrayB[3][$i]++;
$arrayB[5][$i]++;
}
if($arrayA[$i] =~ /\./i){
$arrayB[4][$i]++;
$arrayB[5][$i]++;
}
if($arrayA[$i] =~ /\-/i){
$arrayB[4][$i]++;
$arrayB[5][$i]++;
}
}
}
for($j=0;$j<=$#arrayA;$j++){
my $large=0;my $pos=0;
for($i=0;$i<=4;$i++){
$arrayB[$i][$j]=$arrayB[$i][$j]/$arrayB[5][$j];
if($arrayB[$i][$j]>$large){
$large=$arrayB[$i][$j];
$pos=$i;
}
}
if($large>0.5){
$arrayB[5][$j]=$large;
$arrayB[6][$j]=$array[$pos];
}else{
$arrayB[5][$j]=$large;
$arrayB[6][$j]='N';
}
}
for($i=0;$i<=5;$i++){
for($j=0;$j<=15;$j++){
print OUT " ";
}
for($j=0;$j<=$#arrayA;$j++){
if($arrayB[$i][$j]>0){
printf OUT "%.2f\t",$arrayB[$i][$j];
}else{
print OUT "0\t";
}
}
print OUT "\n";
}
for($j=0;$j<=15;$j++){
print OUT " ";
}
for($j=0;$j<=$#arrayA;$j++){
print OUT "$arrayB[6][$j]\t";
}
print OUT "\n";
for($j=0;$j<=15;$j++){
print OUTA " ";
}
for($j=0;$j<=$#arrayA;$j++){
print OUTA "$arrayB[6][$j]";
}
print OUTA "\n";
@arrayB=undef;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: Moduletest.pl.pl
Type: application/octet-stream
Size: 4525 bytes
Desc: not available
URL: <http://lists.open-bio.org/pipermail/bioperl-l/attachments/20101102/c720f038/attachment-0002.obj>
More information about the Bioperl-l
mailing list