From bugzilla-daemon at portal.open-bio.org Thu May 8 17:59:56 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Thu, 8 May 2008 17:59:56 -0400
Subject: [Bioperl-guts-l] [Bug 2485] Bio::SearchIO::Writer::HSPTableWriter -
'frame' column messes up the output
In-Reply-To:
Message-ID: <200805082159.m48LxupM000865@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2485
------- Comment #6 from jayoung at fhcrc.org 2008-05-08 17:59 EST -------
thank you Chris! (it took me a while to figure out where svn was on our
system, but once I got that going, the blast parsing looks fine)
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Fri May 9 13:40:47 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Fri, 9 May 2008 13:40:47 -0400
Subject: [Bioperl-guts-l] [Bug 2498] New: Add HSP sorting to Bio::Search::Hit
Message-ID:
http://bugzilla.open-bio.org/show_bug.cgi?id=2498
Summary: Add HSP sorting to Bio::Search::Hit
Product: BioPerl
Version: unspecified
Platform: All
OS/Version: All
Status: NEW
Severity: enhancement
Priority: P2
Component: Bio::Search/Bio::SearchIO
AssignedTo: bioperl-guts-l at bioperl.org
ReportedBy: prachi at stanford.edu
It would be very useful to have ordering options for HSPs within each Hit of a
BLAST report. It would be ideally added to Bio::Search::Hit objects. Refer to
post : http://bioperl.org/pipermail/bioperl-l/2008-May/027619.html
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Sat May 10 22:50:50 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Sat, 10 May 2008 22:50:50 -0400
Subject: [Bioperl-guts-l] [Bug 2492] Method "pi" in package
Bio::PopGen::Statistics
In-Reply-To:
Message-ID: <200805110250.m4B2ooRC011558@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2492
cjfields at uiuc.edu changed:
What |Removed |Added
----------------------------------------------------------------------------
Severity|critical |major
------- Comment #1 from cjfields at uiuc.edu 2008-05-10 22:50 EST -------
Reclassifying as this does not fit 'critical', but it does return erroneous
data. I'll try adding a TODO test to PopGen tests for this.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From jason at dev.open-bio.org Sun May 11 02:36:09 2008
From: jason at dev.open-bio.org (Jason Stajich)
Date: Sun, 11 May 2008 02:36:09 -0400
Subject: [Bioperl-guts-l] [14674] bioperl-live/trunk: bug 2492
Message-ID: <200805110636.m4B6a9YC022937@dev.open-bio.org>
Revision: 14674
Author: jason
Date: 2008-05-11 02:36:08 -0400 (Sun, 11 May 2008)
Log Message:
-----------
bug 2492
Modified Paths:
--------------
bioperl-live/trunk/Bio/PopGen/Statistics.pm
bioperl-live/trunk/t/PopGen.t
Modified: bioperl-live/trunk/Bio/PopGen/Statistics.pm
===================================================================
--- bioperl-live/trunk/Bio/PopGen/Statistics.pm 2008-04-25 19:51:04 UTC (rev 14673)
+++ bioperl-live/trunk/Bio/PopGen/Statistics.pm 2008-05-11 06:36:08 UTC (rev 14674)
@@ -674,7 +674,7 @@
sub pi {
my ($self,$individuals,$numsites) = @_;
- my (%data, at marker_names,$n);
+ my (%data,%marker_total, at marker_names,$n);
if( ref($individuals) =~ /ARRAY/i ) {
# one possible argument is an arrayref of Bio::PopGen::IndividualI objs
@@ -682,7 +682,6 @@
$n = scalar @$individuals;
# Here we are calculating the allele frequencies
- my %marker_total;
foreach my $ind ( @$individuals ) {
if( ! $ind->isa('Bio::PopGen::IndividualI') ) {
$self->warn("Expected an arrayref of Bio::PopGen::IndividualI objects, this is a ".ref($ind)."\n");
@@ -690,27 +689,32 @@
}
foreach my $m ( @marker_names ) {
foreach my $allele (map { $_->get_Alleles}
- $ind->get_Genotypes($m) ) {
+ $ind->get_Genotypes($m) ) {
$data{$m}->{$allele}++;
$marker_total{$m}++;
}
}
}
- while( my ($marker,$count) = each %marker_total ) {
- foreach my $c ( values %{$data{$marker}} ) {
- $c /= $count;
- }
- }
+# while( my ($marker,$count) = each %marker_total ) {
+# foreach my $c ( values %{$data{$marker}} ) {
+# $c /= $count;
+# }
+# }
# %data will contain allele frequencies for each marker, allele
- } elsif( ref($individuals) &&
+ } elsif( ref($individuals) &&
$individuals->isa('Bio::PopGen::PopulationI') ) {
my $pop = $individuals;
$n = $pop->get_number_individuals;
foreach my $marker( $pop->get_Markers ) {
push @marker_names, $marker->name;
- $data{$marker->name} = {$marker->get_Allele_Frequencies};
+ #$data{$marker->name} = {$marker->get_Allele_Frequencies};
+ my @genotypes = $pop->get_Genotypes(-marker => $marker->name);
+ for my $al ( map { $_->get_Alleles} @genotypes ) {
+ $data{$marker->name}->{$al}++;
+ $marker_total{$marker->name}++;
+ }
}
- } else {
+ } else {
$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI to pi");
}
# doing all pairwise combinations
@@ -718,22 +722,22 @@
# For now we assume that all individuals have the same markers
my ($diffcount,$totalcompare) = (0,0);
my $pi = 0;
- foreach my $markerdat ( values %data ) {
- my $totalalleles; # this will only be different among markers
- # when there is missing data
- my @alleles = keys %$markerdat;
- foreach my $al ( @alleles ) { $totalalleles += $markerdat->{$al} }
- for( my $i =0; $i < scalar @alleles -1; $i++ ) {
- my ($a1,$a2) = ( $alleles[$i], $alleles[$i+1]);
- $pi += $self->heterozygosity($n,
- $markerdat->{$a1} / $totalalleles,
- $markerdat->{$a2} / $totalalleles);
+ while ( my ($marker,$markerdat) = each %data ) {
+ my $sampsize = $marker_total{$marker};
+ my $ssh = 0;
+ my @alleles = keys %$markerdat;
+ if ( $sampsize > 1 ) {
+ my $denom = $sampsize * ($sampsize - 1.0);
+ foreach my $al ( @alleles ) {
+ $ssh += ($markerdat->{$al} * ($markerdat->{$al} - 1)) / $denom;
}
+ $pi += 1.0 - $ssh;
+ }
}
$self->debug( "pi=$pi\n");
- if( $numsites ) {
+ if( $numsites ) {
return $pi / $numsites;
- } else {
+ } else {
return $pi;
}
}
@@ -764,7 +768,7 @@
#'
sub theta {
- my $self = shift;
+ my $self = shift;
my ( $n, $seg_sites,$totalsites) = @_;
if( ref($n) =~ /ARRAY/i ) {
my $samps = $n;
Modified: bioperl-live/trunk/t/PopGen.t
===================================================================
--- bioperl-live/trunk/t/PopGen.t 2008-04-25 19:51:04 UTC (rev 14673)
+++ bioperl-live/trunk/t/PopGen.t 2008-05-11 06:36:08 UTC (rev 14674)
@@ -10,7 +10,7 @@
use lib 't/lib';
use BioperlTest;
- test_begin(-tests => 98);
+ test_begin(-tests => 100);
use_ok('Bio::PopGen::Individual');
use_ok('Bio::PopGen::Genotype');
@@ -414,7 +414,7 @@
is($population[3]->get_Genotypes, 34);
$population = Bio::PopGen::Population->new(-individuals => \@population);
-is(sprintf("%.3f",$stats->pi($population)),12.335);
+is(sprintf("%.3f",$stats->pi($population)),12.266);
# if forced haploid population is called within pi
# need to decide about that...
# is(sprintf("%.3f",$stats->pi($population)),12.266);
@@ -422,8 +422,8 @@
is(sprintf("%.3f",$stats->theta($population)),5.548);
#TODO: {
# local $TODO = 'May be TJd inconsistency, need to recalculate';
- is(sprintf("%.3f",$stats->tajima_D($population)),'2.960');
- is(sprintf("%.3f",$stats->tajima_D($population->haploid_population)),3.486);
+ is(sprintf("%.3f",$stats->tajima_D($population)),'2.926');
+ is(sprintf("%.3f",$stats->tajima_D($population->haploid_population)),3.468);
#}
$io = Bio::PopGen::IO->new(-format => 'phase',
-file => test_input_file('example.phase'));
@@ -438,3 +438,17 @@
# test diploid data
+
+# bug 2492
+{
+ my $in = Bio::PopGen::IO->new(-format=>"csv", -fh=>\*DATA);
+ my $pop = $in->next_population;
+ is(sprintf("%.3f",$stats->pi($pop)),0.833,'Pi on 3-allele data');
+ is(sprintf("%.3f",$stats->theta($pop)),0.545,'Theta on 3-allele data');
+}
+__DATA__
+SAMPLE,Site-1
+seq_1,G
+seq_2,C
+seq_3,T
+seq_4,G
From jason at dev.open-bio.org Sun May 11 02:40:04 2008
From: jason at dev.open-bio.org (Jason Stajich)
Date: Sun, 11 May 2008 02:40:04 -0400
Subject: [Bioperl-guts-l] [14675]
bioperl-live/trunk/Bio/PopGen/Statistics.pm: bug #2492,
credit to K .Thornton's implementation
Message-ID: <200805110640.m4B6e4d3022970@dev.open-bio.org>
Revision: 14675
Author: jason
Date: 2008-05-11 02:40:04 -0400 (Sun, 11 May 2008)
Log Message:
-----------
bug #2492, credit to K.Thornton's implementation
Modified Paths:
--------------
bioperl-live/trunk/Bio/PopGen/Statistics.pm
Modified: bioperl-live/trunk/Bio/PopGen/Statistics.pm
===================================================================
--- bioperl-live/trunk/Bio/PopGen/Statistics.pm 2008-05-11 06:36:08 UTC (rev 14674)
+++ bioperl-live/trunk/Bio/PopGen/Statistics.pm 2008-05-11 06:40:04 UTC (rev 14675)
@@ -717,8 +717,9 @@
} else {
$self->throw("expected an array reference of a list of Bio::PopGen::IndividualI to pi");
}
- # doing all pairwise combinations
-
+ # based on Kevin Thornton's code:
+ # http://molpopgen.org/software/libsequence/doc/html/PolySNP_8cc-source.html#l00152
+
# For now we assume that all individuals have the same markers
my ($diffcount,$totalcompare) = (0,0);
my $pi = 0;
From bugzilla-daemon at portal.open-bio.org Sun May 11 02:40:47 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Sun, 11 May 2008 02:40:47 -0400
Subject: [Bioperl-guts-l] [Bug 2492] Method "pi" in package
Bio::PopGen::Statistics
In-Reply-To:
Message-ID: <200805110640.m4B6elqH020106@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2492
jason at bioperl.org changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution| |FIXED
------- Comment #2 from jason at bioperl.org 2008-05-11 02:40 EST -------
Thanks Zhen. I've recalculated out Pi is calculated based on Kevin Thornton's
Fixed it in SVN repository.
I should probably revisit the code and see about doing some re-implementation
based on Kevin's approaches.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From jason at dev.open-bio.org Sun May 11 02:41:28 2008
From: jason at dev.open-bio.org (Jason Stajich)
Date: Sun, 11 May 2008 02:41:28 -0400
Subject: [Bioperl-guts-l] [14676] bioperl-live/trunk/Bio/PopGen: code
simplification
Message-ID: <200805110641.m4B6fSct023002@dev.open-bio.org>
Revision: 14676
Author: jason
Date: 2008-05-11 02:41:28 -0400 (Sun, 11 May 2008)
Log Message:
-----------
code simplification
Modified Paths:
--------------
bioperl-live/trunk/Bio/PopGen/Marker.pm
bioperl-live/trunk/Bio/PopGen/Population.pm
bioperl-live/trunk/Bio/PopGen/Statistics.pm
Modified: bioperl-live/trunk/Bio/PopGen/Marker.pm
===================================================================
--- bioperl-live/trunk/Bio/PopGen/Marker.pm 2008-05-11 06:40:04 UTC (rev 14675)
+++ bioperl-live/trunk/Bio/PopGen/Marker.pm 2008-05-11 06:41:28 UTC (rev 14676)
@@ -215,7 +215,7 @@
sub get_Alleles{
my $self = shift;
my (@numeric, at alpha);
-
+
for ( keys %{$self->{'_allele_freqs'}} ) {
if( /[^\d\.\-e]/ ) { push @alpha, $_ }
else { push @numeric, $_ }
Modified: bioperl-live/trunk/Bio/PopGen/Population.pm
===================================================================
--- bioperl-live/trunk/Bio/PopGen/Population.pm 2008-05-11 06:40:04 UTC (rev 14675)
+++ bioperl-live/trunk/Bio/PopGen/Population.pm 2008-05-11 06:41:28 UTC (rev 14676)
@@ -400,13 +400,16 @@
} else {
my @genotypes = $self->get_Genotypes(-marker => $markername);
$marker = Bio::PopGen::Marker->new(-name => $markername);
-
+
if( ! @genotypes ) {
$self->warn("No genotypes for Marker $markername in the population");
} else {
my %alleles;
my $count;
- map { $count++; $alleles{$_}++ } map { $_->get_Alleles } @genotypes;
+ for my $al ( map { $_->get_Alleles} @genotypes ) {
+ $count++;
+ $alleles{$al}++
+ }
foreach my $allele ( keys %alleles ) {
$marker->add_Allele_Frequency($allele, $alleles{$allele}/$count);
}
Modified: bioperl-live/trunk/Bio/PopGen/Statistics.pm
===================================================================
--- bioperl-live/trunk/Bio/PopGen/Statistics.pm 2008-05-11 06:40:04 UTC (rev 14675)
+++ bioperl-live/trunk/Bio/PopGen/Statistics.pm 2008-05-11 06:41:28 UTC (rev 14676)
@@ -719,7 +719,6 @@
}
# based on Kevin Thornton's code:
# http://molpopgen.org/software/libsequence/doc/html/PolySNP_8cc-source.html#l00152
-
# For now we assume that all individuals have the same markers
my ($diffcount,$totalcompare) = (0,0);
my $pi = 0;
From jason at dev.open-bio.org Sun May 11 03:21:06 2008
From: jason at dev.open-bio.org (Jason Stajich)
Date: Sun, 11 May 2008 03:21:06 -0400
Subject: [Bioperl-guts-l] [14677] bioperl-live/trunk/Bio/Search/Hit: first
crack at a sort_hsps option in a Hit object, impemented in GenericHit object
Message-ID: <200805110721.m4B7L64D023102@dev.open-bio.org>
Revision: 14677
Author: jason
Date: 2008-05-11 03:21:06 -0400 (Sun, 11 May 2008)
Log Message:
-----------
first crack at a sort_hsps option in a Hit object, impemented in GenericHit object
Modified Paths:
--------------
bioperl-live/trunk/Bio/Search/Hit/GenericHit.pm
bioperl-live/trunk/Bio/Search/Hit/HitI.pm
Modified: bioperl-live/trunk/Bio/Search/Hit/GenericHit.pm
===================================================================
--- bioperl-live/trunk/Bio/Search/Hit/GenericHit.pm 2008-05-11 06:41:28 UTC (rev 14676)
+++ bioperl-live/trunk/Bio/Search/Hit/GenericHit.pm 2008-05-11 07:21:06 UTC (rev 14677)
@@ -1681,4 +1681,50 @@
return $self->{'_ncbi_gi'};
}
+
+# sort method for HSPs
+
+=head2 sort_hits
+
+ Title : sort_hsps
+ Usage : $result->sort_hsps(\&sort_function)
+ Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
+ by descending score.
+ Returns : n/a
+ Args : A coderef for the sort function. See the documentation on the Perl sort()
+ function for guidelines on writing sort functions.
+ Note : To access the special variables $a and $b used by the Perl sort() function
+ the user function must access Bio::Search::Hit::HitI namespace.
+ For example, use :
+ $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
+ $Bio::Search::Result::HitI::b->length});
+ NOT $hit->sort_hsps($a->length <=> $b->length);
+
+=cut
+
+sub sort_hsps {
+ my ($self, $coderef) = @_;
+ my @sorted_hsps;
+
+ if ($coderef) {
+ $self->throw('sort_hsps requires a sort function passed as a subroutine reference')
+ unless (ref($coderef) eq 'CODE');
+ }
+ else {
+ $coderef = \&_default_sort_hsps;
+ # throw a warning?
+ }
+
+ my @hsps = $self->hsps();
+ eval {@sorted_hsps = sort $coderef @hsps };
+
+ if ($@) {
+ $self->throw("Unable to sort hsps: $@");
+ }
+ else {
+ $self->{'_hsps'} = \@sorted_hsps;
+ 1;
+ }
+}
+
1;
Modified: bioperl-live/trunk/Bio/Search/Hit/HitI.pm
===================================================================
--- bioperl-live/trunk/Bio/Search/Hit/HitI.pm 2008-05-11 06:41:28 UTC (rev 14676)
+++ bioperl-live/trunk/Bio/Search/Hit/HitI.pm 2008-05-11 07:21:06 UTC (rev 14677)
@@ -676,6 +676,45 @@
# aliasing for Steve's method names
sub hit_length { shift->length(@_) }
+
+# sort method for HSPs
+
+=head2 sort_hits
+
+ Title : sort_hsps
+ Usage : $result->sort_hsps(\&sort_function)
+ Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
+ by descending score.
+ Returns : n/a
+ Args : A coderef for the sort function. See the documentation on the Perl sort()
+ function for guidelines on writing sort functions.
+ Note : To access the special variables $a and $b used by the Perl sort() function
+ the user function must access Bio::Search::Hit::HitI namespace.
+ For example, use :
+ $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
+ $Bio::Search::Result::HitI::b->length});
+ NOT $hit->sort_hsps($a->length <=> $b->length);
+
+=cut
+
+sub sort_hsps {shift->throw_not_implemented }
+
+=head2 _default sort_hsps
+
+ Title : _default_sort_hsps
+ Usage : Do not call directly.
+ Function : Sort hsps in ascending order by evalue
+ Args : None
+ Returns: 1 on success
+ Note : Used by $hit->sort_hsps()
+
+=cut
+
+sub _default_sort_hsps {
+ $Bio::Search::Hit::HitI::a->evalue <=>
+ $Bio::Search::Hit::HitI::a->evalue;
+}
+
1;
From bugzilla-daemon at portal.open-bio.org Sun May 11 03:57:30 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Sun, 11 May 2008 03:57:30 -0400
Subject: [Bioperl-guts-l] [Bug 2492] Method "pi" in package
Bio::PopGen::Statistics
In-Reply-To:
Message-ID: <200805110757.m4B7vUOF022920@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2492
------- Comment #3 from zwang01 at sibs.ac.cn 2008-05-11 03:57 EST -------
The code used to calculate 'pi' is:
sub pi {
...
for( my $i =0; $i < scalar @alleles -1; $i++ ) {
my ($a1,$a2) = ( $alleles[$i], $alleles[$i+1]);
$pi += $self->heterozygosity($n,
$markerdat->{$a1} / $totalalleles,
$markerdat->{$a2} / $totalalleles);
}
...
}
sub heterozygosity {
my ($self,$samp_size, $freq1,$freq2) = @_;
if( ! $freq2 ) { $freq2 = 1 - $freq1 }
if( $freq1 > 1 || $freq2 > 1 ) {
$self->warn("heterozygosity expects frequencies to be less than 1");
}
my $sum = ($freq1**2) + (($freq2)**2);
my $h = ( $samp_size*(1- $sum) ) / ($samp_size - 1) ;
return $h;
}
If there are two alleles with frequency x1 and x2, respectively, pi can be
calculated as 1-x1*x1-x2*x2 and the code will give the correct value. However,
if there are three alleles with frequency x1, x2 and x3, pi would be calculated
as (1-x1*x1-x2*x2) + (1-x2*x2+x3*x3), which is wrong as the correct one should
be 1-x1*x1-x2*x2-x3*x3.
In my opinion, the above code can be corrected as:
sub pi {
...
for( my $i =0; $i < scalar @alleles; $i++ ) {
for (my $j = $i + 1; $j < scalar @alleles; $j++) {
my ($a1,$a2) = ( $alleles[$i], $alleles[$j]);
$pi += $self->heterozygosity($n,
$markerdat->{$a1} / $totalalleles,
$markerdat->{$a2} / $totalalleles);
}
}
...
}
sub heterozygosity {
my ($self,$samp_size, $freq1,$freq2) = @_;
if( ! $freq2 ) { $freq2 = 1 - $freq1 }
if( $freq1 > 1 || $freq2 > 1 ) {
$self->warn("heterozygosity expects frequencies to be less than 1");
}
my $h = 2 * $freq1 * $freq2 * $samp_size / ($samp_size - 1);
return $h;
}
For two alleles case, pi = 2*x1*x2; and for three alleles case, pi = 2*x1*x2 +
2*x1*x3 + 2*x2*x3. I believe they are both right.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Sun May 11 17:17:39 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Sun, 11 May 2008 17:17:39 -0400
Subject: [Bioperl-guts-l] [Bug 2498] Add HSP sorting to Bio::Search::Hit
In-Reply-To:
Message-ID: <200805112117.m4BLHdMG013519@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2498
------- Comment #1 from cjfields at uiuc.edu 2008-05-11 17:17 EST -------
Looks like Jason committed a preliminary version of a sort_hsps() method to
GenericHit in Subversion. You can take a look at it to see if it helps.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Mon May 12 11:35:05 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Mon, 12 May 2008 11:35:05 -0400
Subject: [Bioperl-guts-l] [Bug 2498] Add HSP sorting to Bio::Search::Hit
In-Reply-To:
Message-ID: <200805121535.m4CFZ55k013898@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2498
------- Comment #2 from jason at bioperl.org 2008-05-12 11:35 EST -------
Will need to write some example code to use it, but basically you can set the
sort_hsps function on the Hit object --- need to think if there needs to be a
pass-through from Result object to set the sort function.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Mon May 12 11:36:47 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Mon, 12 May 2008 11:36:47 -0400
Subject: [Bioperl-guts-l] [Bug 2492] Method "pi" in package
Bio::PopGen::Statistics
In-Reply-To:
Message-ID: <200805121536.m4CFal1Y014003@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2492
jason at bioperl.org changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|RESOLVED |REOPENED
Resolution|FIXED |
------- Comment #4 from jason at bioperl.org 2008-05-12 11:36 EST -------
Thanks, maybe will just fix the heterozyg calc rather than then other counting
method that I applied in pi since those results are used in other methods too.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Mon May 12 11:38:22 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Mon, 12 May 2008 11:38:22 -0400
Subject: [Bioperl-guts-l] [Bug 2439] multiple results HTMLResultWriter.pm
and non-redundant entries in SearchIO
In-Reply-To:
Message-ID: <200805121538.m4CFcMTX014100@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2439
------- Comment #5 from jason at bioperl.org 2008-05-12 11:38 EST -------
It was never my intention to have > 1 report in an HTML file, but we can try
and unroll part of that assumption looks like Chris has gotten that going.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From dave_messina at dev.open-bio.org Wed May 21 17:04:12 2008
From: dave_messina at dev.open-bio.org (Dave Messina)
Date: Wed, 21 May 2008 17:04:12 -0400
Subject: [Bioperl-guts-l] [14678] bioperl-live/trunk: Added support for
WU-BLAST tabular output.
Message-ID: <200805212104.m4LL4C6p011305@dev.open-bio.org>
Revision: 14678
Author: dave_messina
Date: 2008-05-21 17:04:11 -0400 (Wed, 21 May 2008)
Log Message:
-----------
Added support for WU-BLAST tabular output. Tests added to SearchIO.t and all tests pass.
Modified Paths:
--------------
bioperl-live/trunk/Bio/SearchIO/blasttable.pm
bioperl-live/trunk/t/SearchIO.t
Added Paths:
-----------
bioperl-live/trunk/t/data/test1.blasttab3
bioperl-live/trunk/t/data/test1.wublastp
Modified: bioperl-live/trunk/Bio/SearchIO/blasttable.pm
===================================================================
--- bioperl-live/trunk/Bio/SearchIO/blasttable.pm 2008-05-11 07:21:06 UTC (rev 14677)
+++ bioperl-live/trunk/Bio/SearchIO/blasttable.pm 2008-05-21 21:04:11 UTC (rev 14678)
@@ -26,7 +26,8 @@
=head1 DESCRIPTION
-This module will support parsing NCBI -m 8 or -m 9 tabular output.
+This module will support parsing NCBI -m 8 or -m 9 tabular output
+and WU-BLAST -mformat 2 or -mformat 3 tabular output.
=head1 FEEDBACK
@@ -161,15 +162,47 @@
local $_;
my ($alg, $ver);
while( defined ($_ = $self->_readline) ) {
- # -m 9 only
- if(m{^#\s+((?:\S+?)?BLAST[NPX])\s+(.+)}) {
+ # WU-BLAST -mformat 3 only
+ if(m{^#\s((?:\S+?)?BLAST[NPX])\s(\d+\.\d+.+\d{4}\])}) {
($alg, $ver) = ($1, $2);
+ # only one header for whole file with WU-BLAST
+ # so $alg and $ver won't get set properly for
+ # each result
+ $self->program_name($alg) if $alg;
+ $self->element({'Name' => 'Result_version',
+ 'Data' => $ver}) if $ver;
next;
+ }
+ # -m 9 only
+ elsif(m{^#\s+((?:\S+?)?BLAST[NPX])\s+(.+)}) {
+ ($alg, $ver) = ($1, $2);
+ next;
}
next if /^\#/ || /^\s+$/;
- my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
- $qstart,$qend,$hstart,$hend,$evalue,$bits) = split;
-
+
+ my @fields = split;
+ my ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
+ $qstart,$qend,$hstart,$hend,$evalue,$bits);
+ # WU-BLAST-specific
+ my ($num_scores, $raw_score, $identities, $positives, $percent_pos,
+ $qgap_blocks,$qgaps, $sgap_blocks, $sgaps, $qframe,
+ $sframe);
+ # NCBI -m8 and -m9
+ if (@fields == 12) {
+ ($qname,$hname, $percent_id, $hsp_len, $mismatches,$gapsm,
+ $qstart,$qend,$hstart,$hend,$evalue,$bits) = @fields;
+ }
+ # WU-BLAST -mformat 2 and 3
+ elsif ((@fields == 22) or (@fields == 24)) {
+ ($qname,$hname,$evalue,$num_scores, $bits, $raw_score, $hsp_len,
+ $identities, $positives,$mismatches, $percent_id, $percent_pos,
+ $qgap_blocks, $qgaps, $sgap_blocks, $sgaps, $qframe, $qstart,
+ $qend, $sframe, $hstart,$hend,) = @fields;
+ # we need total gaps in the alignment
+ $gapsm=$qgaps+$sgaps;
+ }
+ else {}
+
# Remember Jim's code is 0 based
if( defined $lastquery &&
$lastquery ne $qname ) {
Modified: bioperl-live/trunk/t/SearchIO.t
===================================================================
--- bioperl-live/trunk/t/SearchIO.t 2008-05-11 07:21:06 UTC (rev 14677)
+++ bioperl-live/trunk/t/SearchIO.t 2008-05-21 21:04:11 UTC (rev 14678)
@@ -7,7 +7,7 @@
use lib 't/lib';
use BioperlTest;
- test_begin(-tests => 1712);
+ test_begin(-tests => 1782);
use_ok('Bio::SearchIO');
use_ok('Bio::SearchIO::Writer::HitTableWriter');
@@ -2055,6 +2055,23 @@
is($tester{$key}, $ref{$key},$key);
}
+# test WU-BLAST blasttable output
+$searchio = Bio::SearchIO->new(-file => test_input_file('test1.wublastp'),
+ -format => 'blast');
+$result = $searchio->next_result;
+isa_ok($result,'Bio::Search::Result::ResultI');
+my %wuref = &result2hash($result);
+is( scalar keys %wuref, 31);
+$searchio = Bio::SearchIO->new(-file => test_input_file('test1.blasttab3'),
+ -program_name => 'BLASTP',
+ -format => 'blasttable');
+$result = $searchio->next_result;
+my %wutester = &result2hash($result);
+is( scalar keys %wutester, 31);
+foreach my $key ( sort keys %ref ) {
+ is($wutester{$key}, $wuref{$key},$key);
+}
+
# Test Blast parsing with B=0 (WU-BLAST)
$searchio = Bio::SearchIO->new(-file => test_input_file('no_hsps.blastp'),
-format => 'blast');
Added: bioperl-live/trunk/t/data/test1.blasttab3
===================================================================
--- bioperl-live/trunk/t/data/test1.blasttab3 (rev 0)
+++ bioperl-live/trunk/t/data/test1.blasttab3 2008-05-21 21:04:11 UTC (rev 14678)
@@ -0,0 +1,21 @@
+# BLASTP 2.0MP-WashU [04-May-2006] [linux26-i686-ILP32F64 2006-05-09T11:47:08]
+# Start: 3:11:00 PM CEST May 21, 2008
+# Parameters:
+# topcomboN=5
+# links
+# E=0.0001
+# mformat=3
+# V=5
+# B=5
+# Database: test_orfs.fa
+# Title: test_orfs.fa
+# Posted: 11:50:04 AM CEST May 14, 2008
+# Created: 11:50:04 AM CEST May 14, 2008
+# Letters: 9460
+# Records: 307
+# Query: 000086_1606_1131
+# Fields: qid sid E N Sprime S alignlen nident npos nmism pcident pcpos qgaps qgaplen sgaps sgaplen qframe qstart qend sframe sstart send group links
+000086_1606_1131 000086_1606_1131 1.7e-11 1 50.12 128 21 21 21 0 100.00 100.00 0 0 0 0 +0 1 21 +0 1 21 1 (1)
+000086_1606_1131 000269_0423_3271 5.4e-06 1 31.81 76 22 17 18 1 77.27 81.82 2 2 2 2 +0 1 20 +0 2 21 1 (1)
+000086_1606_1131 000780_0082_2331 6.9e-06 1 31.46 75 23 17 19 2 73.91 82.61 3 3 1 1 +0 1 20 +0 2 23 1 (1)
+# EXIT: [000086_1606_1131]: 0
Property changes on: bioperl-live/trunk/t/data/test1.blasttab3
___________________________________________________________________
Name: svn:keywords
+ "Author Date Id Rev URL"
Added: bioperl-live/trunk/t/data/test1.wublastp
===================================================================
--- bioperl-live/trunk/t/data/test1.wublastp (rev 0)
+++ bioperl-live/trunk/t/data/test1.wublastp 2008-05-21 21:04:11 UTC (rev 14678)
@@ -0,0 +1,97 @@
+BLASTP 2.0MP-WashU [04-May-2006] [linux26-i686-ILP32F64 2006-05-09T11:47:08]
+
+Copyright (C) 1996-2006 Washington University, Saint Louis, Missouri USA.
+All Rights Reserved.
+
+Reference: Gish, W. (1996-2006) http://blast.wustl.edu
+
+Query= 000086_1606_1131 this is your fake desc
+ (21 letters)
+
+Database: test_orfs.fa
+ 307 sequences; 9460 total letters.
+Searching....10....20....30....40....50....60....70....80....90....100% done
+
+ Smallest
+ Sum
+ High Probability
+Sequences producing High-scoring Segment Pairs: Score P(N) N
+
+000086_1606_1131 [64 - 2] (REVERSE SENSE) length=64 uaccn... 128 1.7e-11 1
+000269_0423_3271 [1 - 78] length=78 uaccno=ER8QEOW01BBHO5 76 5.4e-06 1
+000780_0082_2331 [1 - 78] length=78 uaccno=ER8QEOW01AHI8T 75 6.9e-06 1
+
+
+>000086_1606_1131 [64 - 2] (REVERSE SENSE) length=64 uaccno=ER8QEOW01D7AWT
+ Length = 21
+
+ Score = 128 (50.1 bits), Expect = 1.7e-11, P = 1.7e-11, Group = 1
+ Identities = 21/21 (100%), Positives = 21/21 (100%)
+ Links = (1)
+
+Query: 1 THITHTYTHITHTSHTHHTHR 21
+ THITHTYTHITHTSHTHHTHR
+Sbjct: 1 THITHTYTHITHTSHTHHTHR 21
+
+
+>000269_0423_3271 [1 - 78] length=78 uaccno=ER8QEOW01BBHO5
+ Length = 26
+
+ Score = 76 (31.8 bits), Expect = 5.4e-06, P = 5.4e-06, Group = 1
+ Identities = 17/22 (77%), Positives = 18/22 (81%)
+ Links = (1)
+
+Query: 1 THITHTYTHITHT-SHTH-HTH 20
+ TH THTYTH THT +HTH HTH
+Sbjct: 2 TH-THTYTH-THTQTHTHAHTH 21
+
+
+>000780_0082_2331 [1 - 78] length=78 uaccno=ER8QEOW01AHI8T
+ Length = 26
+
+ Score = 75 (31.5 bits), Expect = 6.9e-06, P = 6.9e-06, Group = 1
+ Identities = 17/23 (73%), Positives = 19/23 (82%)
+ Links = (1)
+
+Query: 1 THI-THTYTHITHT-SHTH-HTH 20
+ THI THT+TH THT +HTH HTH
+Sbjct: 2 THIHTHTHTH-THTHTHTHTHTH 23
+
+
+Parameters:
+ B=5
+ V=5
+ E=0.0001
+ links
+ topcomboN=5
+
+ ctxfactor=1.00
+
+ Query ----- As Used ----- ----- Computed ----
+ Frame MatID Matrix name Lambda K H Lambda K H
+ +0 0 BLOSUM62 0.324 0.122 0.430 same same same
+ Q=9,R=2 0.244 0.0300 0.180 n/a n/a n/a
+
+ Query
+ Frame MatID Length Eff.Length E S W T X E2 S2
+ +0 0 21 21 9.7e-05 71 3 11 22 0.40 21
+ 29 0.44 22
+
+
+Statistics:
+
+ Database: test_orfs.fa
+ Title: test_orfs.fa
+ Posted: 11:50:04 AM CEST May 14, 2008
+ Created: 11:50:04 AM CEST May 14, 2008
+ Format: XDF-1
+ # of letters in database: 9460
+ # of sequences in database: 307
+ # of database sequences satisfying E: 3
+ No. of states in DFA: 208 (22 KB)
+ Total size of DFA: 39 KB (2056 KB)
+ Time to generate neighborhood: 0.00u 0.00s 0.00t Elapsed: 00:00:00
+ No. of threads or processors used: 2
+ Search cpu time: 0.00u 0.02s 0.02t Elapsed: 00:00:00
+ Total cpu time: 0.00u 0.02s 0.02t Elapsed: 00:00:00
+ Start: Wed May 21 22:50:32 2008 End: Wed May 21 22:50:32 2008
Property changes on: bioperl-live/trunk/t/data/test1.wublastp
___________________________________________________________________
Name: svn:keywords
+ "Author Date Id Rev URL"
From faga at dev.open-bio.org Fri May 23 10:35:08 2008
From: faga at dev.open-bio.org (Ben faga)
Date: Fri, 23 May 2008 10:35:08 -0400
Subject: [Bioperl-guts-l] [14679]
bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm: Now,
the loader passes unrecognized meta tags to the store object (if it can).
Message-ID: <200805231435.m4NEZ81T019957@dev.open-bio.org>
Revision: 14679
Author: faga
Date: 2008-05-23 10:35:07 -0400 (Fri, 23 May 2008)
Log Message:
-----------
Now, the loader passes unrecognized meta tags to the store object (if it can). Also, it handles the ### pragma.
Modified Paths:
--------------
bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-05-21 21:04:11 UTC (rev 14678)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-05-23 14:35:07 UTC (rev 14679)
@@ -389,6 +389,15 @@
my $self = shift;
my $instruction = shift;
+ if ( $instruction =~ /^#$/ ) {
+ $self->store_current_feature() ; # during fast loading, we will have a feature left at the very end
+ $self->start_or_finish_sequence(); # finish any half-loaded sequences
+ if ( $self->store->can('handle_resolution_meta') ) {
+ $self->store->handle_resolution_meta($instruction);
+ }
+ return;
+ }
+
if ($instruction =~ /sequence-region\s+(.+)\s+(-?\d+)\s+(-?\d+)/i) {
my($ref,$start,$end,$strand) = $self->_remap($1,$2,$3,+1);
my $feature = $self->sfclass->new(-name => $ref,
@@ -406,6 +415,11 @@
$self->store->index_subfeatures($1);
return;
}
+
+ if ( $self->store->can('handle_unrecognized_meta') ) {
+ $self->store->handle_unrecognized_meta($instruction);
+ return;
+ }
}
=item handle_feature
From heikki at dev.open-bio.org Tue May 27 02:27:18 2008
From: heikki at dev.open-bio.org (Heikki Lehvaslaiho)
Date: Tue, 27 May 2008 02:27:18 -0400
Subject: [Bioperl-guts-l] [14680]
bioperl-run/trunk/Bio/Tools/Run/Phylo/QuickTree.pm: fix typo in docs
Message-ID: <200805270627.m4R6RIso009831@dev.open-bio.org>
Revision: 14680
Author: heikki
Date: 2008-05-27 02:27:18 -0400 (Tue, 27 May 2008)
Log Message:
-----------
fix typo in docs
Modified Paths:
--------------
bioperl-run/trunk/Bio/Tools/Run/Phylo/QuickTree.pm
Modified: bioperl-run/trunk/Bio/Tools/Run/Phylo/QuickTree.pm
===================================================================
--- bioperl-run/trunk/Bio/Tools/Run/Phylo/QuickTree.pm 2008-05-23 14:35:07 UTC (rev 14679)
+++ bioperl-run/trunk/Bio/Tools/Run/Phylo/QuickTree.pm 2008-05-27 06:27:18 UTC (rev 14680)
@@ -136,7 +136,7 @@
-upgma => boolean # Use the UPGMA method to construct the tree [0]
-kimura => boolean # Use the kimura translation for pairwise
# distances [0]
- -boot => int # Calcuate bootstrap values with n iterations [0]
+ -boot => int # Calculate bootstrap values with n iterations [0]
=cut
@@ -204,7 +204,7 @@
Title : boot
Usage : $factory->boot(100);
- Function: Choose to calcuate bootstrap values with the supplied number of
+ Function: Choose to calculate bootstrap values with the supplied number of
iterations.
Returns : int (default 0)
Args : None to get, int to set.
From heikki at dev.open-bio.org Tue May 27 06:15:49 2008
From: heikki at dev.open-bio.org (Heikki Lehvaslaiho)
Date: Tue, 27 May 2008 06:15:49 -0400
Subject: [Bioperl-guts-l] [14681] bioperl-run/trunk: wrapper around phyml
Message-ID: <200805271015.m4RAFn3P010597@dev.open-bio.org>
Revision: 14681
Author: heikki
Date: 2008-05-27 06:15:48 -0400 (Tue, 27 May 2008)
Log Message:
-----------
wrapper around phyml
Added Paths:
-----------
bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm
bioperl-run/trunk/t/Phyml.t
bioperl-run/trunk/t/data/dna_seqs1.phy
Added: bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm
===================================================================
--- bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm (rev 0)
+++ bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-05-27 10:15:48 UTC (rev 14681)
@@ -0,0 +1,655 @@
+# $Id: QuickTree.pm 13928 2007-06-14 15:23:09Z sendu $
+#
+# BioPerl module for Bio::Tools::Run::Phylo::Phyml
+#
+# Cared for by Heikki Lehvaslaiho
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Run::Phylo::Phyml - Wrapper for rapid reconstruction of phylogenies using Phyml
+
+=head1 SYNOPSIS
+
+ use Bio::Tools::Run::Phylo::Phyml;
+
+ # Make a Phyml factory
+ $factory = Bio::Tools::Run::Phylo::Phyml->new(-verbose => 2);
+ # it defaults to protein alignment
+ # change parameters
+ $factory->model('Dayhoff');
+ # Pass the factory an alignment and run
+ $inputfilename = 't/data/protpars.phy';
+ $tree = $factory->run($inputfilename); # $tree is a Bio::Tree::Tree object.
+
+
+ # or set parameters at object creation
+ my %args = (
+ -data_type => 'dna',
+ -model => 'HKY',
+ -kappa => 4,
+ -invar => 'e',
+ -category_number => 4,
+ -alpha => 'e',
+ -tree => 'BIONJ',
+ -opt_topology => '0',
+ -opt_lengths => '1',
+ );
+ $factory = Bio::Tools::Run::Phylo::Phyml->new(%args);
+ # and get a Bio::Align::AlignI (SimpleAlign) object from somewhere
+ $tree = $factory->run($aln);
+
+=head1 DESCRIPTION
+
+This is a wrapper for running the phyml application by St?phane
+Guindon and Olivier Gascuel. You can download it from:
+http://atgc.lirmm.fr/phyml/
+
+=head2 Installing
+
+After downloading, you need to rename a the copy of the program that
+runs under your operating system. I.e. C into C.
+
+You will need to help this Phyml wrapper to find the C program.
+This can be done in (at least) three ways:
+
+=over
+
+=item 1.
+
+Make sure the Phyml executable is in your path. Copy it to, or create
+a symbolic link from a directory that is in your path.
+
+=item 2.
+
+Define an environmental variable PHYMLDIR which is a
+directory which contains the 'phyml' application: In bash:
+
+ export PHYMLDIR=/home/username/phyml_v2.4.4/exe
+
+In csh/tcsh:
+
+ setenv PHYMLDIR /home/username/phyml_v2.4.4/exe
+
+=item 3.
+
+Include a definition of an environmental variable PHYMLDIR in
+every script that will use this Phyml wrapper module, e.g.:
+
+ BEGIN { $ENV{PHYMLDIR} = '/home/username/phyml_v2.4.4/exe' }
+ use Bio::Tools::Run::Phylo::Phyml;
+
+=back
+
+=head2 Running
+
+This wrapper has been tested with PHYML v2.4.4.
+
+In its current state, the wrapper supports only input of one MSA and
+output of one tree. It can easily be extended to support more advanced
+capabilities of C.
+
+Two convienience methods have been added on top of the standard
+BioPerl WrapperBase ones: stats() and tree_string(). You can call them
+to after running the phyml program to retrieve into a string the statistics
+and the tree in Newick format.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+ http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+heikki at bioperl dot org
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+package Bio::Tools::Run::Phylo::Phyml;
+use strict;
+
+use Bio::AlignIO;
+use File::Copy;
+use File::Spec;
+
+use Bio::TreeIO;
+
+use base qw(Bio::Tools::Run::WrapperBase);
+
+our $PROGRAM_NAME = 'phyml';
+our $PROGRAM_DIR = $ENV{'PHYMLDIR'};
+
+
+
+# valid substitution model names
+our $models;
+# DNA
+map { $models->{0}->{$_} = 1 } qw(JC69 K2P F81 HKY F84 TN93 GTR);
+# protein
+map { $models->{1}->{$_} = 1 } qw(JTT MtREV Dayhoff WAG);
+
+
+=head2 program_name
+
+ Title : program_name
+ Usage : $factory>program_name()
+ Function: holds the program name
+ Returns : string
+ Args : None
+
+=cut
+
+sub program_name {
+ return $PROGRAM_NAME;
+}
+
+=head2 program_dir
+
+ Title : program_dir
+ Usage : $factory->program_dir(@params)
+ Function: returns the program directory, obtiained from ENV variable.
+ Returns : string
+ Args : None
+
+=cut
+
+sub program_dir {
+ return $PROGRAM_DIR;
+}
+
+=head2 new
+
+ Title : new
+ Usage : $factory = Bio::Tools::Run::Phylo::Phyml->new(@params)
+ Function: creates a new Phyml factory
+ Returns : Bio::Tools::Run::Phylo::Phyml
+ Args : Optionally, provide any of the following (default in []):
+ -data_type => 'dna' or 'protein', [protein]
+ -dataset_count => 'integer, [1]
+ -model => 'HKY'... , [HKY|JTT]
+ -kappa => 'e' or float, [e]
+ -invar => 'e' or float, [e]
+ -category_number => integer, [1]
+ -alpha => 'e' or float, [e]
+ -tree => 'BIONJ' or your own, [BION]
+ -opt_topology => boolean [y]
+ -opt_lengths => boolean [y]
+
+=cut
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+
+ # for consistency with other run modules, allow params to be dashless
+ my %args = @args;
+ while (my ($key, $val) = each %args) {
+ if ($key !~ /^-/) {
+ delete $args{$key};
+ $args{'-'.$key} = $val;
+ }
+ }
+
+ my ($data_type, $dataset_count, $model, $kappa, $invar,
+ $category_number, $alpha, $tree, $opt_topology,
+ $opt_lengths) = $self->_rearrange([qw( DATA_TYPE
+ DATASET_COUNT
+ MODEL
+ KAPPA
+ INVAR
+ CATEGORY_NUMBER
+ ALPHA
+ TREE
+ OPT_TOPOLOGY
+ OPT_LENGTHS)], %args);
+
+ $self->data_type($data_type) if $data_type;
+ $self->dataset_count($dataset_count) if $dataset_count;
+ $self->model($model) if $model;
+ $self->kappa($kappa) if $kappa;
+ $self->invar($invar) if $invar;
+ $self->category_number($category_number) if $category_number;
+ $self->alpha($alpha) if $alpha;
+ $self->tree($tree) if $tree;
+ $self->opt_topology($opt_topology) if $opt_topology;
+ $self->opt_lengths ($opt_lengths) if $opt_lengths;
+
+ return $self;
+}
+
+=head2 data_type
+
+ Title : data_type
+ Usage : $phyml->data_type('dna');
+ Function: Sets sequence alphabet to 'dna' or 'protein'
+ If leaved unset, will be set automatically
+ Returns : set value, defaults to 'protein'
+ Args : None to get, 'dna' or 'protein' to set.
+
+=cut
+
+sub data_type {
+ my ($self, $value) = @_;
+ if (defined $value) {
+ if ($value eq 'dna') {
+ $self->{_data_type} = '0';
+ } else {
+ $self->{_data_type} = '1';
+ }
+ }
+ return '1' unless defined $self->{_data_type};
+ return $self->{_data_type};
+}
+
+
+=head2 dataset_count
+
+ Title : dataset_count
+ Usage : $phyml->dataset_count(3);
+ Function: Sets dataset number to deal with
+ Returns : set value, defaults to 1
+ Args : None to get, positive integer to set.
+
+=cut
+
+sub dataset_count {
+ my ($self, $value) = @_;
+ if (defined $value) {
+ die "Invalid positive integer [$value]"
+ unless $value =~ /^[-+]?\d*$/ and $value > 0;
+ $self->{_dataset_count} = $value;
+ }
+ return $self->{_dataset_count} || 1;
+}
+
+
+
+=head2 model
+
+ Title : model
+ Usage : $phyml->model('HKY');
+ Function: Choose the substitution model to use. One of
+
+ JC69 | K2P | F81 | HKY | F84 | TN93 | GTR (DNA)
+ JTT | MtREV | Dayhoff | WAG (Amino-Acids)
+
+ Returns : Name of the model, defaults to {HKY|JTT}
+ Args : None to get, string to set.
+
+=cut
+
+sub model {
+ my ($self, $value) = @_;
+ if (defined ($value)) {
+ $self->throw("Not a valid model name [$value] for current data type (alphabet)")
+ unless $models->{$self->data_type}->{$value};
+ $self->{_model} = $value;
+ }
+
+ if ($self->{_model}) {
+ return $self->{_model};
+ }
+ elsif ($self->data_type) {
+ return 'JTT'; # protein
+ } else {
+ return 'HKY'; # DNA
+ }
+}
+
+
+=head2 kappa
+
+ Title : kappa
+ Usage : $phyml->kappa(4);
+ Function: Sets transition/transversion ratio, leave unset to estimate
+ Returns : set value, defaults to 'e'
+ Args : None to get, float or integer to set.
+
+=cut
+
+sub kappa {
+ my ($self, $value) = @_;
+ if (defined $value) {
+ die "Invalid number [$value]"
+ unless $value =~ /^[-+]?\d*\.?\d*$/ or $value eq 'e';
+ $self->{_kappa} = $value;
+ }
+ return 'e' unless defined $self->{_kappa};
+ return 'e' if $self->{_kappa} eq 'e';
+ return sprintf("%.1f", $self->{_kappa});
+}
+
+
+=head2 invar
+
+ Title : invar
+ Usage : $phyml->invar(.3);
+ Function: Sets proportion of invariable sites, leave unset to estimate
+ Returns : set value, defaults to 'e'
+ Args : None to get, float or integer to set.
+
+=cut
+
+sub invar {
+ my ($self, $value) = @_;
+ if (defined $value) {
+ die "Invalid number [$value]"
+ unless $value =~ /^[-+]?\d*\.\d*$/ or $value eq 'e';
+ $self->{_invar} = $value;
+ }
+ return 'e' unless defined $self->{_invar};
+ return 'e' if $self->{_invar} eq 'e';
+ return sprintf("%.1f", $self->{_invar});
+}
+
+
+
+=head2 category_number
+
@@ Diff output truncated at 10000 characters. @@
From cjfields at dev.open-bio.org Tue May 27 13:52:48 2008
From: cjfields at dev.open-bio.org (Christopher John Fields)
Date: Tue, 27 May 2008 13:52:48 -0400
Subject: [Bioperl-guts-l] [14682] bioperl-live/trunk/Bio: RT#12802 (via CPAN
RT)
Message-ID: <200805271752.m4RHqmRr011317@dev.open-bio.org>
Revision: 14682
Author: cjfields
Date: 2008-05-27 13:52:48 -0400 (Tue, 27 May 2008)
Log Message:
-----------
RT#12802 (via CPAN RT)
Modified Paths:
--------------
bioperl-live/trunk/Bio/Graphics/Glyph/arrow.pm
bioperl-live/trunk/Bio/Graphics/Glyph/redgreen_segment.pm
bioperl-live/trunk/Bio/Graphics/Glyph.pm
bioperl-live/trunk/Bio/Location/SplitLocationI.pm
Modified: bioperl-live/trunk/Bio/Graphics/Glyph/arrow.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Glyph/arrow.pm 2008-05-27 10:15:48 UTC (rev 14681)
+++ bioperl-live/trunk/Bio/Graphics/Glyph/arrow.pm 2008-05-27 17:52:48 UTC (rev 14682)
@@ -308,11 +308,11 @@
=head1 NAME
-Ace::Graphics::Glyph::arrow - The "arrow" glyph
+Bio::Graphics::Glyph::arrow - The "arrow" glyph
=head1 SYNOPSIS
- See L and L.
+ See L and L.
=head1 DESCRIPTION
Modified: bioperl-live/trunk/Bio/Graphics/Glyph/redgreen_segment.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Glyph/redgreen_segment.pm 2008-05-27 10:15:48 UTC (rev 14681)
+++ bioperl-live/trunk/Bio/Graphics/Glyph/redgreen_segment.pm 2008-05-27 17:52:48 UTC (rev 14682)
@@ -72,7 +72,7 @@
=head1 NAME
-Bio::Graphics::Glyph::redgreen_segments - The "redgreen_segments" glyph
+Bio::Graphics::Glyph::redgreen_segment - The "redgreen_segments" glyph
=head1 SYNOPSIS
Modified: bioperl-live/trunk/Bio/Graphics/Glyph.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Glyph.pm 2008-05-27 10:15:48 UTC (rev 14681)
+++ bioperl-live/trunk/Bio/Graphics/Glyph.pm 2008-05-27 17:52:48 UTC (rev 14682)
@@ -1406,7 +1406,7 @@
Add the list of features to the glyph, creating subparts. This is
most common done with the track glyph returned by
-Ace::Graphics::Panel-Eadd_track().
+Bio::Graphics::Panel-Eadd_track().
=item $feature = $glyph-Eadd_group(@features)
Modified: bioperl-live/trunk/Bio/Location/SplitLocationI.pm
===================================================================
--- bioperl-live/trunk/Bio/Location/SplitLocationI.pm 2008-05-27 10:15:48 UTC (rev 14681)
+++ bioperl-live/trunk/Bio/Location/SplitLocationI.pm 2008-05-27 17:52:48 UTC (rev 14682)
@@ -21,9 +21,8 @@
my $count = 1;
# print the start/end points of the sub locations
- foreach my $location ( sort { $a->start <=> $b->start }
- @sublocs ) {
- printf "sub feature %d [%d..%d]\n", $location->start,$location->end;
+ foreach my $location ( sort { $a->start <=> $b->start } @sublocs ) {
+ printf "sub feature %d [%d..%d]\n", $location->start,$location->end;
$count++;
}
From bugzilla-daemon at portal.open-bio.org Tue May 27 14:56:06 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Tue, 27 May 2008 14:56:06 -0400
Subject: [Bioperl-guts-l] [Bug 2504] New: Bug in Bio::SearchIO module
Message-ID:
http://bugzilla.open-bio.org/show_bug.cgi?id=2504
Summary: Bug in Bio::SearchIO module
Product: BioPerl
Version: unspecified
Platform: PC
OS/Version: Linux
Status: NEW
Severity: normal
Priority: P2
Component: Bio::Search/Bio::SearchIO
AssignedTo: bioperl-guts-l at bioperl.org
ReportedBy: kashi.mail at gmail.com
Hi,
I think there is a small bug in the Bio::SearchIO module. I am parsing
the BLAST output file using this module. It works great except for one
thing.
I have included a part of the blast outputfile ( I have modified the
lines to fit into this box). Most of the times the score of Sequences
producing significant alignments is in the format of 6.149e+04. This
module picks up only 6 and ignores other digits, otherwise it works fine.
Can you please look into this for me.
Thank you in advance
Kashi
Attached: The sample Blast output file is here
====================================================================
BLASTN 2.2.15 [Oct-15-2006]
Query= Contig_1011 (31,018 letters)
Database: scaffold_3.fsa
84 sequences; 3,615,155 total letters
Searching..................................................done
Score E
Sequences producing significant alignments: (bits) Value
Contig_1011 6.149e+04 0.0
Contig_8873 2397 0.0
Contig_1482 2042 0.0
Contig_9461 1475 0.0
Contig_1977 339 7e-92
>Contig_1011
Length = 31018
Score = 6.149e+04 bits (31018), Expect = 0.0
Identities = 31018/31018 (100%)
Strand = Plus / Plus
Query: 1
ttcaacaaacacgtatttctgaatgaaattgtttagagtttgttgaaggtcacgatcag 60
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 1
ttcaacaaacacgtatttctgaatgaaattgtttagagtttgttgaaggtcacgatcag 60
Query: 61
gctcatagaccagcggtcctgaaagaggattgcctttaagtttgttggaaaaaacgatta 120
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 61
gctcatagaccagcggtcctgaaagaggattgcctttaagtttgttggaaaaaacgatta 120
====================================================================
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Tue May 27 21:40:12 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Tue, 27 May 2008 21:40:12 -0400
Subject: [Bioperl-guts-l] [Bug 2505] New: Add in a sort feature for
SeqFeatureI get_all_tags()
Message-ID:
http://bugzilla.open-bio.org/show_bug.cgi?id=2505
Summary: Add in a sort feature for SeqFeatureI get_all_tags()
Product: BioPerl
Version: unspecified
Platform: All
OS/Version: Mac OS
Status: NEW
Severity: enhancement
Priority: P2
Component: Core Components
AssignedTo: bioperl-guts-l at bioperl.org
ReportedBy: cjfields at uiuc.edu
Add in some functionality to sort SeqFeature tags.
per the mail list:
http://bioperl.org/pipermail/bioperl-l/2008-May/027732.html
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Wed May 28 11:50:36 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Wed, 28 May 2008 11:50:36 -0400
Subject: [Bioperl-guts-l] [Bug 2504] Bug in Bio::SearchIO module
In-Reply-To:
Message-ID: <200805281550.m4SFoab0026854@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2504
kashi.mail at gmail.com changed:
What |Removed |Added
----------------------------------------------------------------------------
Severity|normal |major
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Wed May 28 12:44:56 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Wed, 28 May 2008 12:44:56 -0400
Subject: [Bioperl-guts-l] [Bug 2504] Bug in Bio::SearchIO module
In-Reply-To:
Message-ID: <200805281644.m4SGiuGN030103@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2504
------- Comment #1 from cjfields at uiuc.edu 2008-05-28 12:44 EST -------
You need to attach the BLAST report (preferably a full report, not a partial
one) using the 'Create a New Attachment' link above. Copy-paste doesn't work;
note the text wrapping issue.
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From bugzilla-daemon at portal.open-bio.org Wed May 28 13:43:01 2008
From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org)
Date: Wed, 28 May 2008 13:43:01 -0400
Subject: [Bioperl-guts-l] [Bug 2504] Bug in Bio::SearchIO module
In-Reply-To:
Message-ID: <200805281743.m4SHh17B001104@portal.open-bio.org>
http://bugzilla.open-bio.org/show_bug.cgi?id=2504
------- Comment #2 from kashi.mail at gmail.com 2008-05-28 13:43 EST -------
Created an attachment (id=925)
--> (http://bugzilla.open-bio.org/attachment.cgi?id=925&action=view)
sample Blast file
--
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.
From miraceti at dev.open-bio.org Wed May 28 15:42:18 2008
From: miraceti at dev.open-bio.org (miraceti at dev.open-bio.org)
Date: Wed, 28 May 2008 15:42:18 -0400
Subject: [Bioperl-guts-l] [14683] bioperl-live/trunk/Bio/Tree: New module to
handle phyloxml
Message-ID: <200805281942.m4SJgIS5013573@dev.open-bio.org>
Revision: 14683
Author: miraceti
Date: 2008-05-28 15:42:17 -0400 (Wed, 28 May 2008)
Log Message:
-----------
New module to handle phyloxml
Added Paths:
-----------
bioperl-live/trunk/Bio/Tree/NodePhyloXML.pm
bioperl-live/trunk/Bio/TreeIO/PhyloXMLEventBuilder.pm
bioperl-live/trunk/Bio/TreeIO/phyloxml.pm
Added: bioperl-live/trunk/Bio/Tree/NodePhyloXML.pm
===================================================================
--- bioperl-live/trunk/Bio/Tree/NodePhyloXML.pm (rev 0)
+++ bioperl-live/trunk/Bio/Tree/NodePhyloXML.pm 2008-05-28 19:42:17 UTC (rev 14683)
@@ -0,0 +1,173 @@
+# $Id: NodePhyloXML.pm 11508 2007-06-23 01:38:32Z jason $
+#
+# BioPerl module for Bio::Tree::NodePhyloXML
+#
+# Cared for by Mira Han
+#
+# Copyright Mira Han
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tree::NodePhyloXML - A Simple Tree Node with support for PhyloXML tags
+
+=head1 SYNOPSIS
+
+ use Bio::Tree::NodePhyloXML;
+ my $nodeA = Bio::Tree::NodePhyloXML->new();
+ my $nodeL = Bio::Tree::NodePhyloXML->new();
+ my $nodeR = Bio::Tree::NodePhyloXML->new();
+
+ my $node = Bio::Tree::NodePhyloXML->new();
+ $node->add_Descendents($nodeL);
+ $node->add_Descendents($nodeR);
+
+ print "node is not a leaf \n" if( $node->is_leaf);
+
+=head1 DESCRIPTION
+
+Makes a Tree Node with PhyloXML tags, suitable for building a Tree. See
+L for a full list of functionality.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+ http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Mira Han
+
+Email mirhan at indiana.edu
+
+=head1 CONTRIBUTORS
+
+The PhyloXML format was created by Chris Zmasek,
+and is described at:
+
+ http://www.phyloxml.org/
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+package Bio::Tree::NodePhyloXML;
+use strict;
+
+
+use base qw(Bio::Tree::Node);
+
+=head2 new
+
+ Title : new
+ Usage : my $obj = Bio::Tree::NodePhyloXML->new();
+ Function: Builds a new Bio::Tree::NodePhyloXML object
+ Returns : Bio::Tree::NodePhyloXML
+ Args : -left => pointer to Left descendent (optional)
+ -right => pointer to Right descenent (optional)
+ -branch_length => branch length [integer] (optional)
+ -bootstrap => bootstrap value (string)
+ -description => description of node
+ -id => unique id for node
+ -user_tag => hashref of PhyloXML tags and values
+
+=cut
+
+sub new {
+ my($class, at args) = @_;
+
+ my $self = $class->SUPER::new(@args);
+ my ($user_tag) = $self->_rearrange([qw(PhyloXML)], @args);
+ $self->_tag($user_tag);
+ return $self;
+}
+
+sub DESTROY {
+ my ($self) = @_;
+ # try to insure that everything is cleaned up
+ $self->SUPER::DESTROY();
+ if( defined $self->{'_desc'} &&
+ ref($self->{'_desc'}) =~ /ARRAY/i ) {
+ while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
+ $node->{'_ancestor'} = undef; # insure no circular references
+ $node->DESTROY();
+ $node = undef;
+ }
+ $self->{'_desc'} = {};
+ }
+}
+
+sub to_string{
+ my ($self) = @_;
+ my @tags = $self->get_all_tags;
+ my $tagstr = '';
+ if( @tags ) {
+ $tagstr = '[' . join(":", "&&PhyloXML",
+ map { "$_=" .join(',',
+ $self->get_tag_values($_))}
+ @tags ) . ']';
+ }
+ return sprintf("%s%s%s",
+ defined $self->id ? $self->id : '',
+ defined $self->branch_length ? ':' .
+ $self->branch_length : ' ',
+ $tagstr);
+}
+
+=head2 _tag
+
+ Title : _tag
+ Usage : my $tag = $nodephyloXML->_tag(%tags);
+ Function: Set tag-value pairs for PhyloXML nodes
+ Returns : none
+ Args : hashref to update the tags/value pairs
+ OR
+ with a scalar value update the bootstrap value by default
+
+
+=cut
+
+sub _tag {
+ my ($self, $tags) = @_;
+ if (defined $tags && (ref($tags) =~ /HASH/i)) {
+ while( my ($tag,$val) = each %$tags ) {
+ if( ref($val) =~ /ARRAY/i ) {
+ for my $v ( @$val ) {
+ $self->add_tag_value($tag,$v);
+ }
+ } else {
+ $self->add_tag_value($tag,$val);
+ }
+ }
+ if (exists $tags->{'B'}) {
+ $self->bootstrap($tags->{'B'});
+ }
+ } elsif (defined $tags and ! ref ($tags)) {
+ $self->debug( "here with $tags\n");
+ # bootstrap by default
+ $self->bootstrap($tags);
+ }
+}
+
+1;
Added: bioperl-live/trunk/Bio/TreeIO/PhyloXMLEventBuilder.pm
===================================================================
--- bioperl-live/trunk/Bio/TreeIO/PhyloXMLEventBuilder.pm (rev 0)
+++ bioperl-live/trunk/Bio/TreeIO/PhyloXMLEventBuilder.pm 2008-05-28 19:42:17 UTC (rev 14683)
@@ -0,0 +1,354 @@
+# $Id: PhyloXMLEventBuilder.pm 11480 2007-06-14 14:16:21Z sendu $
+#
+# BioPerl module for Bio::TreeIO::PhyloXMLEventBuilder
+#
+# Cared for by Mira Han
+#
+# Copyright Mira Han
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::TreeIO::PhyloXMLEventBuilder - Build Bio::Tree::Tree's and
+ Bio::Tree::Node's from Events
+
+=head1 SYNOPSIS
+
+# internal use only
+
+=head1 DESCRIPTION
+
+This object will take events and build a Bio::Tree::TreeI compliant
+object makde up of Bio::Tree::NodeI objects.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Mira Han
+
+Email mirhan at indiana.edu
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::TreeIO::PhyloXMLEventBuilder;
+use strict;
+
+use Bio::Tree::Tree;
+use Bio::Tree::Node;
+
+use base qw(Bio::Root::Root Bio::Event::EventHandlerI Bio::TreeIO::TreeEventBuilder);
+
+=head2 new
+
+ Title : new
+ Usage : my $obj = Bio::TreeIO::PhyloXMLEventBuilder->new();
+ Function: Builds a new Bio::TreeIO::PhyloXMLEventBuilder object
+ Returns : Bio::TreeIO::PhyloXMLEventBuilder
+ Args :
+
+
+=cut
+
+sub new {
+ my($class, at args) = @_;
+
+ my $self = $class->SUPER::new(@args);
+ my ($treetype, $nodetype) = $self->_rearrange([qw(TREETYPE
+ NODETYPE)], @args);
+ $treetype ||= 'Bio::Tree::Tree';
+ $nodetype ||= 'Bio::Tree::NodePhyloXML';
+
+ eval {
+ $self->_load_module($treetype);
+ $self->_load_module($nodetype);
+ };
+
+ if( $@ ) {
+ $self->throw("Could not load module $treetype or $nodetype. \n$@\n")
+ }
+ $self->treetype($treetype);
+ $self->nodetype($nodetype);
+ $self->{'_treelevel'} = 0;
+ $self->debug("Creating obj PhyloXMLEventBuilder\n");
+ $self->debug("nodetype: $nodetype\n");
+ return $self;
+}
+
+=head2 treetype
+
+ Title : treetype
+ Usage : $obj->treetype($newval)
+ Function:
+ Returns : value of treetype
+ Args : newvalue (optional)
+
+
+=cut
+
+sub treetype{
+ my ($self,$value) = @_;
+ if( defined $value) {
+ $self->{'treetype'} = $value;
+ }
+ return $self->{'treetype'};
+}
+
+=head2 nodetype
+
+ Title : nodetype
+ Usage : $obj->nodetype($newval)
+ Function:
+ Returns : value of nodetype
+ Args : newvalue (optional)
+
+
+=cut
+
+sub nodetype{
+ my ($self,$value) = @_;
+ if( defined $value) {
+ $self->{'nodetype'} = $value;
+ }
+ return $self->{'nodetype'};
+}
+
+
+=head2 SAX methods
+
+=cut
+
+=head2 start_document
+
+ Title : start_document
+ Usage : $handler->start_document
+ Function: Begins a Tree event cycle
+ Returns : none
+ Args : none
+
+=cut
+
+sub start_document {
+ my ($self) = @_;
+ $self->{'_lastitem'} = {};
+ $self->{'_currentitems'} = [];
+ $self->{'_currentnodes'} = [];
+ return;
+}
+
+=head2 end_document
+
+ Title : end_document
+ Usage : my @trees = $parser->end_document
+ Function: Finishes a Phylogeny cycle
+ Returns : An array Bio::Tree::TreeI
+ Args : none
+
+=cut
+
+sub end_document {
+ my ($self,$label) = @_;
+ my $root = $self->nodetype->new(
+ -id => $label,
+ -verbose => $self->verbose);
+# aggregate the nodes into trees basically ad-hoc.
+ while ( @{$self->{'_currentnodes'}} ) {
+ my ($node) = ( shift @{$self->{'_currentnodes'}});
+ $root->add_Descendent($node);
+ }
+
+ $self->debug("Root node is " . $root->to_string()."\n");
+ if( $self->verbose > 0 ) {
+ foreach my $node ( $root->get_Descendents ) {
+ $self->debug("node is ". $node->to_string(). "\n");
+ }
+ }
+ my $tree = $self->treetype->new(-verbose => $self->verbose,
+ -root => $root);
+ return $tree;
+}
+
+=head2 start_element
+
+ Title : start_element
+ Usage :
+ Function:
+ Example :
+ Returns :
+ Args : $data => hashref with key 'Name'
+
+=cut
+
+sub start_element
+{
+ my ($self,$data) =@_;
+ $self->{'_lastitem'}->{$data->{'Name'}}++;
+
+ $self->debug("starting element: $data->{Name}\n");
+ push @{$self->{'_lastitem'}->{'current'}},$data->{'Name'};
+
+ my %data;
+
+ if( $data->{'Name'} eq 'clade' )
+ {
+ push @{$self->{'_currentitems'}}, \%data;
+ }
+ elsif ( $data->{'Name'} eq 'phylogeny' )
+ {
+ $self->{'_treelevel'}++;
+ }
+}
+
+=head2 end_element
+
+ Title : end_element
+ Usage :
+ Function:
+ Returns : none
+ Args : $data => hashref with key 'Name'
+
+=cut
+
+sub end_element{
+ my ($self,$data) = @_;
+
+ $self->debug("end of element: $data->{Name}\n");
+# this is the stack where we push/pop items from it
+ my $curcount = scalar @{$self->{'_currentnodes'}};
@@ Diff output truncated at 10000 characters. @@
From miraceti at dev.open-bio.org Wed May 28 15:47:45 2008
From: miraceti at dev.open-bio.org (miraceti at dev.open-bio.org)
Date: Wed, 28 May 2008 15:47:45 -0400
Subject: [Bioperl-guts-l] [14684] bioperl-live/trunk/t: test scripts for
phyloxml modules
Message-ID: <200805281947.m4SJljlD013604@dev.open-bio.org>
Revision: 14684
Author: miraceti
Date: 2008-05-28 15:47:45 -0400 (Wed, 28 May 2008)
Log Message:
-----------
test scripts for phyloxml modules
Added Paths:
-----------
bioperl-live/trunk/t/data/phyloxml_small.xml
bioperl-live/trunk/t/phyloxml.t
Added: bioperl-live/trunk/t/data/phyloxml_small.xml
===================================================================
--- bioperl-live/trunk/t/data/phyloxml_small.xml (rev 0)
+++ bioperl-live/trunk/t/data/phyloxml_small.xml 2008-05-28 19:47:45 UTC (rev 14684)
@@ -0,0 +1,24 @@
+
+
+
+
+ example from Prof. Joe Felsenstein's book "Inferring Phylogenies"
+ phyloXML allows to use either a "distance" attribute or element to indicate branch lengths.
+
+
+
+ A
+
+
+ B
+
+
+
+ C
+
+
+
+
+
Added: bioperl-live/trunk/t/phyloxml.t
===================================================================
--- bioperl-live/trunk/t/phyloxml.t (rev 0)
+++ bioperl-live/trunk/t/phyloxml.t 2008-05-28 19:47:45 UTC (rev 14684)
@@ -0,0 +1,35 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: phyloxml.t 14580 2008-03-01 17:01:30Z cjfields $
+
+use strict;
+
+BEGIN {
+ use lib 't/lib';
+ use BioperlTest;
+
+ test_begin(-tests => 4);
+
+ use_ok('Bio::TreeIO');
+}
+
+my $verbose = test_debug();
+
+ok my $treeio = Bio::TreeIO->new(
+ -verbose => $verbose,
+ -format => 'phyloxml',
+ -file => test_input_file('phyloxml_small.xml'));
+
+my $tree = $treeio->next_tree;
+isa_ok($tree, 'Bio::Tree::TreeI');
+
+TODO: {
+ local $TODO = 'write_tree not implemented yet';
+ my $FILE1 = test_output_file();
+ $treeio = Bio::TreeIO->new(-verbose => $verbose,
+ -format => 'phyloxml',
+ -file => ">$FILE1");
+ $treeio->write_tree($tree);
+ undef $treeio;
+ ok( -s $FILE1 );
+}
+
From heikki at dev.open-bio.org Thu May 29 05:55:30 2008
From: heikki at dev.open-bio.org (Heikki Lehvaslaiho)
Date: Thu, 29 May 2008 05:55:30 -0400
Subject: [Bioperl-guts-l] [14685] bioperl-live/trunk: Added support for long
IDs (give -longid => 1 to constuctor).
Message-ID: <200805290955.m4T9tUdm015021@dev.open-bio.org>
Revision: 14685
Author: heikki
Date: 2008-05-29 05:55:29 -0400 (Thu, 29 May 2008)
Log Message:
-----------
Added support for long IDs (give -longid => 1 to constuctor). They can be optionally surrounded by single quotes. A warning is printed out if the length of an ID is over 50 characters long. Updated docs to show that both interleaved and sequential formats can be read and written.
Modified Paths:
--------------
bioperl-live/trunk/Bio/AlignIO/phylip.pm
bioperl-live/trunk/t/AlignIO.t
Added Paths:
-----------
bioperl-live/trunk/t/data/protpars_longid.phy
Modified: bioperl-live/trunk/Bio/AlignIO/phylip.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/phylip.pm 2008-05-28 19:47:45 UTC (rev 14684)
+++ bioperl-live/trunk/Bio/AlignIO/phylip.pm 2008-05-29 09:55:29 UTC (rev 14685)
@@ -45,11 +45,13 @@
=head1 DESCRIPTION
This object can transform Bio::SimpleAlign objects to and from PHYLIP
-interleaved format. It will not work with PHYLIP sequencial format.
+fotmat. By deafult it works with the interleaved format. By specifying
+the flag -interleaved =E 0 in the initialization the module can
+read or write data in sequential format.
-This module will output PHYLIP sequential format. By specifying the
-flag -interleaved =E 0 in the initialization the module can output
-data in interleaved format.
+Long IDs up to 50 characters are supported by flag -longid =E
+1. ID strings can be surrounded by single quoted. They are mandatory
+only if the IDs contain spaces.
=head1 FEEDBACK
@@ -102,8 +104,8 @@
Args : [specific for writing of phylip format files]
-idlength => integer - length of the id (will pad w/
spaces if needed)
- -interleaved => boolean - whether or not write as interleaved
- or sequential format
+ -interleaved => boolean - whether interleaved
+ or sequential format required
-line_length => integer of how long a sequence lines should be
-idlinebreak => insert a line break after the sequence id
so that sequence starts on the next line
@@ -115,6 +117,7 @@
-wrap_sequential => boolean for whether or not sequential
format should be broken up or a single line
default is false (single line)
+ -longid => boolean for allowing arbitrary long IDs (default is false)
=cut
@@ -123,21 +126,23 @@
$self->SUPER::_initialize(@args);
my ($interleave,$linelen,$idlinebreak,
- $idlength, $flag_SI, $tag_length,$ws) =
+ $idlength, $flag_SI, $tag_length,$ws, $longid) =
$self->_rearrange([qw(INTERLEAVED
LINE_LENGTH
IDLINEBREAK
IDLENGTH
FLAG_SI
TAG_LENGTH
- WRAP_SEQUENTIAL)], at args);
- $self->interleaved(1) if( $interleave || ! defined $interleave);
+ WRAP_SEQUENTIAL
+ LONGID)], at args);
+ $self->interleaved($interleave ? 1 : 0) if defined $interleave;
$self->idlength($idlength || $DEFAULTIDLENGTH);
$self->id_linebreak(1) if( $idlinebreak );
$self->line_length($linelen) if defined $linelen && $linelen > 0;
$self->flag_SI(1) if ( $flag_SI );
$self->tag_length($tag_length) if ( $tag_length || $DEFAULTTAGLEN );
$self->wrap_sequential($ws ? 1 : 0);
+ $self->longid($longid ? 1 : 0);
1;
}
@@ -176,14 +181,31 @@
$self->_pushback($entry);
last;
}
- if( $entry =~ /^\s+(.+)$/ ) {
+ if( $self->longid && $entry =~ /\w/ ) {
+ if ($entry =~ /'/) {
+ $entry =~ /^\s*'([^']+)'\s+(.+)$/;
+ $name = $1;
+ $str = $2;
+ } else {
+ $entry =~ /^\s*([^\s]+)\s+(.+)$/;
+ $name = $1;
+ $str = $2;
+ }
+# $name =~ s/[\s\/]/_/g; # not sure how wise is it to do this
+ $name =~ s/_+$//; # remove any trailing _'s
+
+ push @names, $name;
+ $str =~ s/\s//g;
+ $count = scalar @names;
+ $hash{$count} = $str;
+
+ } elsif( $entry =~ /^\s+(.+)$/ ) {
$interleaved = 0;
$str = $1;
$str =~ s/\s//g;
$count = scalar @names;
$hash{$count} .= $str;
-
- } elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ ||
+ } elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ ||
$entry =~ /^(.{$idlen})(\S{$idlen}\s+.+)\s$/ # Handle weirdnes s when id is too long
) {
$name = $1;
@@ -310,14 +332,19 @@
$tag_length = $self->tag_length();
foreach $seq ( $aln->each_seq() ) {
$name = $aln->displayname($seq->get_nse);
- $name = substr($name, 0, $idlength) if length($name) > $idlength;
- $name = sprintf("%-".$idlength."s",$name);
- if( $self->interleaved() ) {
- $name .= ' ' ;
- } elsif( $self->id_linebreak) {
- $name .= "\n";
+ if ($self->longid) {
+ $self->warn("The lenght of the name is over 50 chars long [$name]")
+ if length($name) > 50;
+ $name = "'$name' "
+ } else {
+ $name = substr($name, 0, $idlength) if length($name) > $idlength;
+ $name = sprintf("%-".$idlength."s",$name);
+ if( $self->interleaved() ) {
+ $name .= ' ' ;
+ } elsif( $self->id_linebreak) {
+ $name .= "\n";
+ }
}
-
#phylip needs dashes not dots
my $seq = $seq->seq();
$seq =~ s/\./-/g;
@@ -390,13 +417,14 @@
=cut
-sub interleaved{
+sub interleaved {
my ($self,$value) = @_;
- my $previous = $self->{'_interleaved'};
if( defined $value ) {
- $self->{'_interleaved'} = $value;
+ if ($value) {$self->{'_interleaved'} = 1 }
+ else {$self->{'_interleaved'} = 0 }
}
- return $previous;
+ return 1 unless defined $self->{'_interleaved'};
+ return $self->{'_interleaved'};
}
=head2 flag_SI
@@ -520,4 +548,23 @@
return $self->{'_wrap_sequential'} || 0;
}
+=head2 longid
+
+ Title : longid
+ Usage : $obj->longid($newval)
+ Function:
+ Returns : value of longid
+ Args : newvalue (optional)
+
+
+=cut
+
+sub longid{
+ my ($self,$value) = @_;
+ if( defined $value) {
+ $self->{'_longid'} = $value;
+ }
+ return $self->{'_longid'} || 0;
+}
+
1;
Modified: bioperl-live/trunk/t/AlignIO.t
===================================================================
--- bioperl-live/trunk/t/AlignIO.t 2008-05-28 19:47:45 UTC (rev 14684)
+++ bioperl-live/trunk/t/AlignIO.t 2008-05-29 09:55:29 UTC (rev 14685)
@@ -7,7 +7,7 @@
use lib 't/lib';
use BioperlTest;
- test_begin(-tests => 290);
+ test_begin(-tests => 294);
use_ok('Bio::AlignIO');
}
@@ -594,6 +594,19 @@
'GAAGAAATCTGTTGACTCAGATTGGTTGCACTTTAAATTTT' );
+# PHYLIP interleaved with long Ids
+$str = Bio::AlignIO->new(
+ '-file' => test_input_file("protpars_longid.phy"),
+ '-format' => 'phylip',
+ 'longid' => 1);
+
+isa_ok($str,'Bio::AlignIO');
+$aln = $str->next_aln();
+isa_ok($aln,'Bio::Align::AlignI');
+is $aln->get_seq_by_pos(1)->get_nse, 'S I N F R U P 0 0 1 /1-84';
+is $aln->get_seq_by_pos(2)->get_nse, 'SINFRUP002/1-84';
+
+
# LARGEMULTIFASTA
$str = Bio::AlignIO->new(
'-file' => test_input_file('little.largemultifasta'),
@@ -768,4 +781,4 @@
last;
}
is $status, 1, "filehandle output test : $format";
-}
\ No newline at end of file
+}
Added: bioperl-live/trunk/t/data/protpars_longid.phy
===================================================================
--- bioperl-live/trunk/t/data/protpars_longid.phy (rev 0)
+++ bioperl-live/trunk/t/data/protpars_longid.phy 2008-05-29 09:55:29 UTC (rev 14685)
@@ -0,0 +1,8 @@
+ 3 100
+'S I N F R U P 0 0 1 ' .......... ......DDQV VLQCTASVLK EQIKLCLSCE GFGNRLCFLE
+ SINFRUP002 .......... ......DDQV VLQCTASVLK EQIKLCLSCE GFGNRLCFLE
+ENSP000003 .MGDAEGEDE VQFLRTDDEV VLQCSATVLK EQLKLCLAAE GFGNRLCFLE
+
+ TTSNAQNVPP DLAICTFILE QSLSVRALQE MLANTVEMTE AVDLDKWSSQ
+ TTSNAQNVPP DLAICTFILE QSLSVRALQE MLANTVEMTE AVDLDKWSSQ
+ PTSNAQNVPP DLAICCFVLE QSLSVRALQE MLANT..... .VEAGVESSQ
From heikki at dev.open-bio.org Thu May 29 05:58:19 2008
From: heikki at dev.open-bio.org (Heikki Lehvaslaiho)
Date: Thu, 29 May 2008 05:58:19 -0400
Subject: [Bioperl-guts-l] [14686] bioperl-run/trunk: Added forgotten
-data_format attribute.
Message-ID: <200805290958.m4T9wJJU015046@dev.open-bio.org>
Revision: 14686
Author: heikki
Date: 2008-05-29 05:58:19 -0400 (Thu, 29 May 2008)
Log Message:
-----------
Added forgotten -data_format attribute. Now uses -longid to format the MSA to phylip sequential format for phyml input.
Modified Paths:
--------------
bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm
bioperl-run/trunk/t/Phyml.t
Modified: bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm
===================================================================
--- bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-05-29 09:55:29 UTC (rev 14685)
+++ bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-05-29 09:58:19 UTC (rev 14686)
@@ -41,6 +41,10 @@
-opt_lengths => '1',
);
$factory = Bio::Tools::Run::Phylo::Phyml->new(%args);
+ # if you need the output files do
+ $factory->save_tempfiles(1);
+ $factory->tempdir($workdir);
+
# and get a Bio::Align::AlignI (SimpleAlign) object from somewhere
$tree = $factory->run($aln);
@@ -214,9 +218,10 @@
}
}
- my ($data_type, $dataset_count, $model, $kappa, $invar,
+ my ($data_type, $data_format, $dataset_count, $model, $kappa, $invar,
$category_number, $alpha, $tree, $opt_topology,
$opt_lengths) = $self->_rearrange([qw( DATA_TYPE
+ DATA_FORMAT
DATASET_COUNT
MODEL
KAPPA
@@ -228,6 +233,7 @@
OPT_LENGTHS)], %args);
$self->data_type($data_type) if $data_type;
+ $self->data_format($data_format) if $data_format;
$self->dataset_count($dataset_count) if $dataset_count;
$self->model($model) if $model;
$self->kappa($kappa) if $kappa;
@@ -241,6 +247,7 @@
return $self;
}
+
=head2 data_type
Title : data_type
@@ -266,6 +273,27 @@
}
+=head2 data_format
+
+ Title : data_format
+ Usage : $phyml->data_format('dna');
+ Function: Sets PHYLIP format to 'i' interleaved or
+ 's' sequential
+ Returns : set value, defaults to 'i'
+ Args : None to get, 'i' or 's' to set.
+
+=cut
+
+sub data_format {
+ my ($self, $value) = @_;
+ if (defined $value) {
+ $self->throw("PHYLIP format must be 'i' or 's'")
+ unless $value eq 'i' or $value eq 's';
+ $self->{_data_format} = $value;
+ }
+ return $self->{_data_format} || 'i';
+}
+
=head2 dataset_count
Title : dataset_count
@@ -484,20 +512,19 @@
=head2 run
Title : run
- Usage : $factory->run($stockholm_file);
+ Usage : $factory->run($aln_file);
$factory->run($align_object);
Function: Runs Phyml to generate a tree
Returns : Bio::Tree::Tree object
- Args : file name for your input alignment in stockholm format, OR
- Bio::Align::AlignI complient object (eg. Bio::SimpleAlign).
+ Args : file name for your input alignment in a format
+ recognised by AlignIO, OR Bio::Align::AlignI
+ complient object (eg. Bio::SimpleAlign).
=cut
sub run {
my ($self, $in) = @_;
- #print "----------------$in---------\n";
-
if (ref $in && $in->isa("Bio::Align::AlignI")) {
$in = $self->_write_phylip_align_file($in);
}
@@ -509,7 +536,7 @@
copy ($in, $self->tempdir);
my $name = File::Spec->splitpath($in); # name is the last item in the array
$in = File::Spec->catfile($self->tempdir, $name);
- }
+ }
return $self->_run($in);
}
@@ -602,7 +629,7 @@
my $self = shift;
my $param_string = ' ' . $self->data_type;
- $param_string .= ' i'; # support only 'interleaved'
+ $param_string .= ' '. $self->data_format;
$param_string .= ' '. $self->dataset_count;
$param_string .= ' 0'; # no bootstap sets
@@ -640,15 +667,15 @@
sub _write_phylip_align_file {
my ($self, $align) = @_;
- my ($tfh, $tempfile) = $self->io->tempfile(-dir=>$self->tempdir);
-
- my $out = Bio::AlignIO->new('-fh' => $tfh,
- '-format' => 'phylip');
+ my $tempfile = File::Spec->catfile($self->tempdir, "aln$$.phylip");
+ $self->data_format('i');
+ my $out = Bio::AlignIO->new('-file' => ">$tempfile",
+ '-format' => 'phylip',
+ '-interleaved' => 0,
+ '-longid' => 1 );
$out->write_aln($align);
$out->close();
$out = undef;
- close($tfh);
- undef $tfh;
return $tempfile;
}
Modified: bioperl-run/trunk/t/Phyml.t
===================================================================
--- bioperl-run/trunk/t/Phyml.t 2008-05-29 09:55:29 UTC (rev 14685)
+++ bioperl-run/trunk/t/Phyml.t 2008-05-29 09:58:19 UTC (rev 14686)
@@ -10,7 +10,7 @@
}
use Test::More;
- plan tests => 37;
+ plan tests => 40;
use_ok('Bio::Tools::Run::Phylo::Phyml');
use_ok('Bio::AlignIO');
@@ -34,9 +34,12 @@
#use Data::Dumper; print Dumper $factory;
is ($factory->data_type('protein'), '1', 'data_type, protein');
+is ($factory->data_format, 'i', 'data_format, default');
+is ($factory->data_format('s'), 's', 'data_format, sequential');
+is ($factory->data_format('i'), 'i', 'data_format, interleaved');
is ($factory->dataset_count, 1, 'dataset_count, default');
-is ($factory->dataset_count(2), 2, 'data_type, 2');
+is ($factory->dataset_count(2), 2, 'data_count, 2');
is ($factory->model, 'JTT', 'model, default');
is ($factory->model('WAG'), 'WAG', 'model');
@@ -104,9 +107,11 @@
-tree => 'BIONJ',
-opt_topology => '0',
-opt_lengths => '1',
+ -verbose => 0
);
$factory = Bio::Tools::Run::Phylo::Phyml->new(%args);
+ $factory->save_tempfiles(1);
$tree = $factory->run($aln);
@leaves = $tree->get_leaf_nodes;
is (@leaves, 5, 'Result tree from DNA SimpleAlign input had correct number of leaves');
From lstein at dev.open-bio.org Thu May 29 10:17:43 2008
From: lstein at dev.open-bio.org (Lincoln Stein)
Date: Thu, 29 May 2008 10:17:43 -0400
Subject: [Bioperl-guts-l] [14687] bioperl-live/trunk/Bio: added some more
special cases to the gene glyph so that it "behaves right" with unusual
data
Message-ID: <200805291417.m4TEHhkn015720@dev.open-bio.org>
Revision: 14687
Author: lstein
Date: 2008-05-29 10:17:43 -0400 (Thu, 29 May 2008)
Log Message:
-----------
added some more special cases to the gene glyph so that it "behaves right" with unusual data
Modified Paths:
--------------
bioperl-live/trunk/Bio/DB/SeqFeature/NormalizedFeature.pm
bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
bioperl-live/trunk/Bio/Graphics/Glyph/gene.pm
bioperl-live/trunk/Bio/Graphics/Glyph/generic.pm
bioperl-live/trunk/Bio/Graphics/Glyph/segments.pm
Added Paths:
-----------
bioperl-live/trunk/Bio/DB/GFF/Aggregator/gene.pm
Added: bioperl-live/trunk/Bio/DB/GFF/Aggregator/gene.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/GFF/Aggregator/gene.pm (rev 0)
+++ bioperl-live/trunk/Bio/DB/GFF/Aggregator/gene.pm 2008-05-29 14:17:43 UTC (rev 14687)
@@ -0,0 +1,108 @@
+=head1 NAME
+
+Bio::DB::GFF::Aggregator::gene -- Sequence Ontology Geene
+
+=head1 SYNOPSIS
+
+ use Bio::DB::GFF;
+
+ # Open the sequence database
+ my $db = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
+ -dsn => 'dbi:mysql:elegans42',
+ -aggregator => ['gene'],
+ );
+
+ ------------------------------------------------------------------------
+ Aggregator method: gene
+ Main method: mRNA
+ Sub methods: CDS exon five_prime_UTR three_prime_UTR transcription_start_site polyA_site
+ ------------------------------------------------------------------------
+
+=head1 DESCRIPTION
+
+Bio::DB::GFF::Aggregator::gene is identical to so_transcript, but is
+used in those cases where you would like the name of the aggregated
+feature to be "gene" rather than "processed_transcript". It aggregates
+raw "exon," "CDS", "five_prime_UTR", "three_prime_UTR",
+"transcription_start_site" and "polyA_site" features into "mRNA"
+features. The UTRs may also be named "untranslated_region,"
+"five_prime_untranslated_region," "three_prime_untranslated_region,",
+"5'-UTR," and other synonyms.
+
+=cut
+
+package Bio::DB::GFF::Aggregator::gene;
+
+use strict;
+
+use base qw(Bio::DB::GFF::Aggregator);
+
+=head2 method
+
+ Title : method
+ Usage : $aggregator->method
+ Function: return the method for the composite object
+ Returns : the string "gene"
+ Args : none
+ Status : Public
+
+=cut
+
+sub method { 'gene' }
+
+=head2 part_names
+
+ Title : part_names
+ Usage : $aggregator->part_names
+ Function: return the methods for the sub-parts
+ Returns : the list CDS 5'-UTR 3'-UTR transcription_start_site polyA_site
+ Args : none
+ Status : Public
+
+=cut
+
+sub part_names {
+ return qw(CDS transcription_start_site
+ polyA_site UTR five_prime_untranslated_region
+ three_prime_untranslated_region
+ five_prime_UTR three_prime_UTR exon);
+}
+
+=head2 main_name
+
+ Title : main_name
+ Usage : $aggregator->main_name
+ Function: return the method for the main component
+ Returns : the string "mRNA"
+ Args : none
+ Status : Public
+
+=cut
+
+sub main_name {
+ return 'mRNA';
+}
+
+1;
+__END__
+
+=head1 BUGS
+
+None reported.
+
+
+=head1 SEE ALSO
+
+L, L
+
+=head1 AUTHOR
+
+Lincoln Stein Elstein at cshl.orgE.
+
+Copyright (c) 2008 Cold Spring Harbor Laboratory.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
+
Modified: bioperl-live/trunk/Bio/DB/SeqFeature/NormalizedFeature.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/NormalizedFeature.pm 2008-05-29 09:58:19 UTC (rev 14686)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/NormalizedFeature.pm 2008-05-29 14:17:43 UTC (rev 14687)
@@ -451,17 +451,20 @@
# freakish fixing of our non-standard Target attribute
if (my $t = ($self->attributes('Target'))[0]) {
my ($seqid,$tstart,$tend,$strand) = split /\s+/,$t;
- my $min_tstart = $tstart;
- my $max_tend = $tend;
- for my $seg (@$segs) {
- my $st = ($seg->attributes('Target'))[0] or next;
- (undef,$tstart,$tend) = split /\s+/,$st;
- $min_tstart = $tstart if $tstart < $min_tstart;
- $max_tend = $tend if $tend > $max_tend;
+ if (defined $tstart && defined $tend) {
+ my $min_tstart = $tstart;
+ my $max_tend = $tend;
+ for my $seg (@$segs) {
+ my $st = ($seg->attributes('Target'))[0] or next;
+ (undef,$tstart,$tend) = split /\s+/,$st;
+ next unless defined $tstart && defined $tend;
+ $min_tstart = $tstart if $tstart < $min_tstart;
+ $max_tend = $tend if $tend > $max_tend;
+ }
+ if ($min_tstart < $tstart or $max_tend > $tend) {
+ $self->{attributes}{Target}[0] = join ' ',($seqid,$min_tstart,$max_tend,$strand||'');
+ }
}
- if ($min_tstart < $tstart or $max_tend > $tend) {
- $self->{attributes}{Target}[0] = join ' ',($seqid,$min_tstart,$max_tend,$strand||'');
- }
}
}
Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-05-29 09:58:19 UTC (rev 14686)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-05-29 14:17:43 UTC (rev 14687)
@@ -472,7 +472,8 @@
# $unreserved->{ID}= $reserved->{ID} if exists $reserved->{ID};
# TEMPORARY HACKS TO SIMPLIFY DEBUGGING
- push @{$unreserved->{Alias}},$feature_id if $has_loadid;
+ $feature_id ||= ''; $name ||= ''; # prevent uninit variable warnings
+ push @{$unreserved->{Alias}},$feature_id if $has_loadid && $feature_id ne $name;
$unreserved->{parent_id} = \@parent_ids if @parent_ids;
# POSSIBLY A PERMANENT HACK -- TARGETS BECOME ALIASES
Modified: bioperl-live/trunk/Bio/Graphics/Glyph/gene.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Glyph/gene.pm 2008-05-29 09:58:19 UTC (rev 14686)
+++ bioperl-live/trunk/Bio/Graphics/Glyph/gene.pm 2008-05-29 14:17:43 UTC (rev 14687)
@@ -44,9 +44,9 @@
sub bump {
my $self = shift;
- return 1
+ return 1 # top level bumps, other levels don't unless specified in config
if $self->{level} == 0
- && lc $self->feature->primary_tag eq 'gene'; # top level bumps, other levels don't unless specified in config
+ && lc $self->feature->primary_tag eq 'gene';
return $self->SUPER::bump;
}
@@ -94,13 +94,16 @@
sub _subfeat {
my $class = shift;
my $feature = shift;
- if (lc $feature->primary_tag eq 'gene') {
+
+ if ($feature->primary_tag =~ /^gene/i) {
my @transcripts;
for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene/) {
push @transcripts, $feature->get_SeqFeatures($t);
}
- return @transcripts;
- } elsif (lc $feature->primary_tag eq 'cds') {
+ return @transcripts
+ ? @transcripts
+ : $feature->get_SeqFeatures; # no transcripts?! Return whatever's there.
+ } elsif ($feature->primary_tag =~ /^CDS/i) {
my @parts = $feature->get_SeqFeatures();
return ($feature) if $class->{level} == 0 and !@parts;
return @parts;
@@ -110,7 +113,7 @@
if ($class->option('sub_part')) {
@subparts = $feature->get_SeqFeatures($class->option('sub_part'));
}
- elsif ($feature->primary_tag eq 'mRNA') {
+ elsif ($feature->primary_tag =~ /^mRNA/i) {
@subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR));
}
else {
Modified: bioperl-live/trunk/Bio/Graphics/Glyph/generic.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Glyph/generic.pm 2008-05-29 09:58:19 UTC (rev 14686)
+++ bioperl-live/trunk/Bio/Graphics/Glyph/generic.pm 2008-05-29 14:17:43 UTC (rev 14687)
@@ -83,9 +83,11 @@
}
sub label {
my $self = shift;
+
return if $self->{overbumped}; # set by the bumper when we have hit bump limit
return unless $self->subpart_callbacks; # returns true if this is level 0 or if subpart callbacks allowed
return $self->_label if $self->{level} >= 0;
+
return exists $self->{label} ? $self->{label}
: ($self->{label} = $self->_label);
}
@@ -286,7 +288,9 @@
sub draw_label {
my $self = shift;
my ($gd,$left,$top,$partno,$total_parts) = @_;
+
my $label = $self->label or return;
+
my $x = $self->left + $left; # valid for both "top" and "left" because the left-hand side is defined by pad_left
my $font = $self->labelfont;
if ($self->label_position eq 'top') {
Modified: bioperl-live/trunk/Bio/Graphics/Glyph/segments.pm
===================================================================
--- bioperl-live/trunk/Bio/Graphics/Glyph/segments.pm 2008-05-29 09:58:19 UTC (rev 14686)
+++ bioperl-live/trunk/Bio/Graphics/Glyph/segments.pm 2008-05-29 14:17:43 UTC (rev 14687)
@@ -131,44 +131,6 @@
}
-# sub draw_component {
-# my $self = shift;
-# my ($gd,$l,$t) = @_;
-# $self->SUPER::draw_component(@_);
-# return unless $self->option('draw_protein_target') && $self->protein_fits;
-# my $hit = eval {$self->feature->hit} or return;
-# my $protein = uc eval {$hit->seq->seq} or return;
-# my ($left,$top,$right,$bottom) = $self->bounds($l,$t);
-
-# my $scale = $self->scale;
-# my @letters = split '',$protein;
-# my $color = $self->fgcolor;
-# my $font = $self->font;
-# my $fw = $font->width;
-# my $strand = $self->feature->strand || 0;
-
-# my $panel_left = $self->panel->left;
-# my $panel_right = $self->panel->right;
-
-# my ($x1,$x2) = $self->map_no_trunc($self->feature->start,$self->feature->end);
-
-# if ($strand >= 0) { # + strand features
-# for (0.. at letters-1) {
-# next if $x1 < $panel_left or $x1 > $panel_right;
-# $gd->char($font,$x1+1,$top,$letters[$_],$color);
-# } continue {
-# $x1 += $scale * 3;
-# }
-# } else { # - strand features
-# for (0.. at letters-1) {
-# next if $x2 < $panel_left or $x2 > $panel_right;
-# $gd->char($font,$x2+1,$top,$letters[$_],$color);
-# } continue {
-# $x2 -= $scale * 3;
-# }
-# }
-# }
-
sub draw_multiple_alignment {
my $self = shift;
my $gd = shift;