use strict; use Bio::SearchIO; use Bio::SimpleAlign; use Bio::Tools::Run::Alignment::TCoffee; use Bio::Graphics; use Bio::SeqFeature::Generic; use GD::Graph::lines; use GD::Graph::Data; print "opening hp-file..."; open (fi,'hp_list-test.txt') || die "cannot open hp-file"; my @hp_names=; close fi; my %hp; for (@hp_names) { shift; chomp; my @hp_name=split (/_/,$_); push(@{$hp{$hp_name[1]}},$hp_name[2]."_".$hp_name[5]."_".$hp_name[6]); } #for my $ww (sort keys %hp) {print @{$hp{$ww}},"ss"}; print "done!\n"; print "opening te-file..."; open (fw,"te1-test.fa") || die "cannot open te-file"; my @te_a=; close fw; print "done!\n"; my %te; for (my $i=0; $i<=$#te_a; $i+=2) { chomp $te_a[$i]; chomp $te_a[$i+1]; $te{$te_a[$i]}=$te_a[$i+1]; } #cheking if te is from hp and the following blasting; for my $te_name(sort {$a cmp $b} keys %te) { for my $te_hp_name(sort {$a cmp $b} keys %hp) { if ($te_name eq $te_hp_name) { open (fq,'>temp.in'); print fq '>',$te_name,"\n",$te{$te_name},"\n"; close fq; print "blasting of $te_name..."; print `c:\\blast\\bin\\blastall -p blastn -i temp.in -d e:\\sequences\\dm3\\db_for_blast\\dm3.blast.fa -e 1e-10 -o test1\\blast_$te_hp_name.txt` or die "cannot blast"; print "done!\n"; } } } #parcing for my $te_hp_name(sort keys %hp) { my %hash; print "parsing $te_hp_name..."; my $in = new Bio::SearchIO(-format => 'blast', -file => "test1\\blast_$te_hp_name.txt"); my @multi_hsp_fa; my $len; my $number_of_hsp; push @multi_hsp_fa, ('>',$te_hp_name,"\n",$te{$te_hp_name},"\n"); my $len=length $te{$te_hp_name}; # print "_",$te_hp_name,"-",$len,"\t"; while( my $result = $in->next_result ) { my $name_query=$result->query_name; my @score_average=(); while( my $hit = $result->next_hit ) { my $name_hit=$hit->name; while( my $hsp = $hit->next_hsp ) { if ($hsp ->length('hit')>=(0.97*$len)) { my $start_hsp=$hsp->start('hit'); my $end_hsp=$hsp->end('hit'); my $strand=(); if ($hsp->strand('hit') eq '1' ){ $strand = 1; } else {$strand = 2}; $number_of_hsp++; my @score_ext_hsp=extr_phastCons_score ($start_hsp,$end_hsp,$name_hit); foreach (my $j=0; $j<=$#score_ext_hsp; $j++) { chomp $score_ext_hsp[$j]; $hash{$j+1}+=$score_ext_hsp[$j]; } print `c:\\blast\\bin\\fastacmd -d e:\\sequences\\dm3\\db_for_blast\\dm3.blast.fa -p F -s "$name_hit" -L $start_hsp,$end_hsp -l 200 -S $strand > temp.fasta`; open (fz,'temp.fasta'); my @seq_hsp=; close fz; # for (@seq_hsp) { # s/\n//; # } push @multi_hsp_fa, (@seq_hsp); } } } } print "done!\n"; print "drawing $te_hp_name..."; #this is Y1 my @phast_score_average; foreach my $key (sort {$a<=>$b} keys %hash) { my $score_average=("%.3f",$hash{$key}/$number_of_hsp); push @phast_score_average, $score_average; } #aligning of hsp seqs to obtain a alignment open (fs,'>temp_align.in'); print fs @multi_hsp_fa; close fs; my $inputfilename='temp_align.in'; ## HERE THE PROBLEM; # my @align_fa = align($inputfilename); #this is Y2 # my @est_changes=est_ch(@align_fa); #draw my @X; for (my $t=1; $t<=$len; $t++) { push @X,$t; } # my @data=([@X],[@phast_score_average],[@est_changes]); my @data=([@X],[@phast_score_average]); my $panel = Bio::Graphics::Panel->new( -length => "$len", -width => 5000, -pad_left => 19, -pad_right => 10, # -pad_bottom => 100, ); my $full_length = Bio::SeqFeature::Generic->new(-start=>1,-end=>$len); $panel->add_track ( $full_length, -glyph => 'arrow', -tick => 2, -fgcolor => 'black', -double => 0, ); my $track = $panel->add_track( -glyph => 'generic', -label => 1 ); foreach my $hp_indiv (sort @{$hp{$te_hp_name}}) { my @hp_fetures=split (/_/,$hp_indiv); my $feature = Bio::SeqFeature::Generic->new( -display_name => $hp_fetures[0]."_".$hp_fetures[1]."_".$hp_fetures[2], -strand => $hp_fetures[0], -start => $hp_fetures[1], -end => $hp_fetures[2] ); $track->add_feature($feature); } my $graph = GD::Graph::lines->new(5020, 200); #length and height $graph->set( #x_label => 'nt', -t_margin => 100, # -r_margin => 10, # y_label => "PhastScore of $name", #title => 'An Area Graph', x_ticks => 0, y_max_value => 1.5, y_tick_number => 3, x_label_skip => 500, # x_label_position=>0, # no_axes=>0, box_axis=>0, accent_treshold => 1, transparent => 1, # x_tick_number=>auto, # dclrs=>[green], # correct_width=>1, dclrs => [qw(green blue)], ); my $gd = $graph->plot(\@data); my $combined = $panel->gd($gd); open(IMG, ">test1//$te_hp_name.png") or die $!; binmode IMG; print IMG $combined->png; print "done!\n"; } sub extr_phastCons_score { (my $start, my $end, my $name) = @_; my $len_extract = $end-$start+1; open (fr,">temp.phast"); print fr `sed -n '$start,$end\p' e:\\sequences\\dm3\\phastCons\\$name.pp`;# or die 'cannot execute sed'; close fr; open (fy,"temp.phast"); my @scores=; close fy; return @scores; } sub est_ch { my @seqs=@_; my %nt_cons=(); my $count_seqs=0; my %count_nt=(); my @change=(); my $cons_name=shift @seqs; my $cons_seq=shift @seqs; chomp $cons_seq; my @cons_nt=split (//,$cons_seq); for (my $i=0; $i<=$#cons_nt; $i++){ $nt_cons{$i}=$cons_nt[$i]; } for (my $j=0; $j<=$#seqs; $j+=2) { my @hsp_nt=split (//, $seqs[$j+1]); for (my $k=0; $k<=$#hsp_nt; $k++) { if ($hsp_nt[$k] eq $nt_cons{$k}) { $count_nt{$k}++; } } $count_seqs++; } for my $position (sort keys %count_nt) { my $change_pos=$count_nt{$position}/$count_seqs; push @change, $change_pos; } return @change; } sub align { my $file=shift @_; my @params = ('ktuple' => 2, 'matrix' => 'BLOSUM', 'outfile' => "temp_align.out",'output' => "fasta"); my $factory = Bio::Tools::Run::Alignment::TCoffee->new(@params); my $aln=$factory->align ($file); open (fy,'temp_align.out'); my @temp_file=; close fy; return @temp_file; }