# retrive sequence from Genbank use strict; use Bio::DB::GenBank; use Bio::SeqIO; my $gb = new Bio::DB::GenBank; open(OUT,">HCV-5UTR-2.txt")||die "Can't open the file!"; my $seqout = new Bio::SeqIO(-fh => \*OUT, -format => 'fasta'); my $query = Bio::DB::Query::GenBank->new (-query =>'Hepatitis C virus[Organism] AND genotype=1a and 5 UTR', -db => 'nucleotide'); my $seqio = $gb->get_Stream_by_query($query); my $i=0; while( defined (my $seq = $seqio->next_seq )) { $seqout->write_seq($seq); $i++; print "."; } print "$i \n"; # run multi sequence alignment use Bio::Tools::Run::Alignment::Clustalw; use Bio::SimpleAlign; use Bio::AlignIO::clustalw; my @params = ('ktuple' => 2, 'matrix' => 'BLOSUM','output'=>'mdf','outfile'=>'HCV-5UTR-2.aln'); my $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params); my $ktuple = 3; $factory->ktuple($ktuple); my $inputfilename = '/home/Tonny/perl-5.10.0/HCV-5UTR-2.txt'; my $aln = $factory->align($inputfilename); my $alignout->write_aln(my $aln); my $line; my @array=('A','C','G','T','-'); my @arrayX=('a','c','g','t','-'); my @arrayA; my @arrayB; my ($i,$j,$k); my (@a,@b); my %hash; open FILE, "/home/Tonny/perl-5.10.0/HCV-5UTR-2.aln" or die; open OUT, ">getAlignHCV-5 UTR-1a.txt" or die; open OUTA, ">getConsensusHCV-5 UTR-1a.txt" or die; while($line=){ chomp; if($line=~ /CLUSTAL/){ next; }elsif($line=~ /^[A-Za-z0-9]/){ @a=split /\s+/,$line; if(defined($hash{$a[0]})){ push @{$hash{$a[0]}}, $line; }else{ ${$hash{$a[0]}}[0]=$line; push @b,$a[0]; } } } for(my $k=0;$k<=$#{$hash{$a[0]}};$k++){ for(my $l=0;$l<=$#b;$l++){ print OUT ${$hash{$b[$l]}}[$k]; my $seq=(split /\s+/,${$hash{$b[$l]}}[$k])[1]; @arrayA=split //,$seq; for($i=0;$i<=$#arrayA;$i++){ if($arrayA[$i] =~ /A/i){ $arrayB[0][$i]++; $arrayB[5][$i]++; } if($arrayA[$i] =~ /C/i){ $arrayB[1][$i]++; $arrayB[5][$i]++; } if($arrayA[$i] =~ /G/i){ $arrayB[2][$i]++; $arrayB[5][$i]++; } if($arrayA[$i] =~ /T/i){ $arrayB[3][$i]++; $arrayB[5][$i]++; } if($arrayA[$i] =~ /\./i){ $arrayB[4][$i]++; $arrayB[5][$i]++; } if($arrayA[$i] =~ /\-/i){ $arrayB[4][$i]++; $arrayB[5][$i]++; } } } for($j=0;$j<=$#arrayA;$j++){ my $large=0;my $pos=0; for($i=0;$i<=4;$i++){ $arrayB[$i][$j]=$arrayB[$i][$j]/$arrayB[5][$j]; if($arrayB[$i][$j]>$large){ $large=$arrayB[$i][$j]; $pos=$i; } } if($large>0.5){ $arrayB[5][$j]=$large; $arrayB[6][$j]=$array[$pos]; }else{ $arrayB[5][$j]=$large; $arrayB[6][$j]='N'; } } for($i=0;$i<=5;$i++){ for($j=0;$j<=15;$j++){ print OUT " "; } for($j=0;$j<=$#arrayA;$j++){ if($arrayB[$i][$j]>0){ printf OUT "%.2f\t",$arrayB[$i][$j]; }else{ print OUT "0\t"; } } print OUT "\n"; } for($j=0;$j<=15;$j++){ print OUT " "; } for($j=0;$j<=$#arrayA;$j++){ print OUT "$arrayB[6][$j]\t"; } print OUT "\n"; for($j=0;$j<=15;$j++){ print OUTA " "; } for($j=0;$j<=$#arrayA;$j++){ print OUTA "$arrayB[6][$j]"; } print OUTA "\n"; @arrayB=undef; }