[Bioperl-l] Need help for implementing a new TreeIO module

Guillaume Rousse rousse at ccr.jussieu.fr
Tue Jan 4 08:11:36 EST 2005


Jason Stajich wrote:
> If you want to build a node with two leaves, first you have to start 
> with a 'tree' section to tell the handler that this is nested data.
> Start a 'tree' event, build the node (like the section just above), then 
> build two leaf nodes (like the leaf node section above), then end the 
> 'tree' event.  'tree' is an unfortunate name for the event but don't 
> feel like changing it - a throwback from when I thought I'd only need an 
> initial 'tree' an just 'node' events.
> 
> $self->_eventHandler->start_document;
> $self->_eventHandler->start_element({'Name' => 'tree'});
> # do internal node
>   # do leaf node
>   # do leaf node
> $self->_eventHandler->end_element({'Name' => 'tree'});
> return $self->_eventHandler->end_document;
OK, done, but I still have an issue with each internal node connecting 
two leaves, producing a third intermediate leaf. I don't know if the 
problems comes from me or from bioperl. Here is my code, along with a 
test script.

I you don't want to install Algorithm::Cluster to test, the input data 
is something as:
  -1:   5   4   0.000
  -2:   7   6   0.000
  -3:  10  11   0.010
  -4:   2   0   0.090
  -5:  -3  12   0.095
  -6:   1  -4   0.115
  -7:  -5   9   0.143
  -8:  -1   3   0.250
  -9:  -2  -7   0.618
-10:  -8  -6   0.639
-11:   8 -10   5.805
-12:  -9 -11  28.056
Where the first column is internal node id, the second and third one the 
  children id for each node, and the fourth one the distance between the 
children.

I also patched svggraph to use parameters instead of hard-coded values, 
and also to allow some normalisation for the branches lengths, in such a 
way that it would be easy to add new normalisation functions, including 
arbitrary code. Patch attached too.
-- 
No flight ever leaves on time unless you are running late and need the 
delay to make the flight
		-- Murphy's Laws for Frequent Flyers n°1
-------------- next part --------------
#!/usr/bin/perl 

use Algorithm::Cluster;
use Bio::TreeIO;
use strict;

my $weight =  [ 1,1 ];

my $data =  [
	[ 1.1, 1.2 ],
	[ 1.4, 1.3 ],
	[ 1.1, 1.5 ],
	[ 2.0, 1.5 ],
	[ 1.7, 1.9 ],
	[ 1.7, 1.9 ],
	[ 5.7, 5.9 ],
	[ 5.7, 5.9 ],
	[ 3.1, 3.3 ],
	[ 5.4, 5.3 ],
	[ 5.1, 5.5 ],
	[ 5.0, 5.5 ],
	[ 5.1, 5.2 ],
];

my $mask =  [
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
	[ 1, 1 ],
];

my $labels = [ qw/a b c d e f g h i j k l m/ ];

my %params = (
	applyscale =>         0,
	transpose  =>         0,
	method     =>       'a',
	dist       =>       'e',
	data      =>    $data,
	mask      =>    $mask,
	weight    =>  $weight,
);

my ($result, $linkdist);
my ($i,$j);

($result, $linkdist) = Algorithm::Cluster::treecluster(%params);

$i=0;
foreach(@{$result}) {
	printf("%3d: %3d %3d %7.3f\n",-1-$i,$_->[0],$_->[1],$linkdist->[$i]);
	++$i;
}

my $in = new Bio::TreeIO(
    -format   => 'cluster',
    -result   => $result,
    -linkdist => $linkdist,
    -labels   => $labels,
);
my $out = new Bio::TreeIO(
    -format => 'svggraph',
    -file   => '>output.svg'
);
$out->write_tree($in->next_tree());
-------------- next part --------------
# $Id: nexus.pm,v 1.2 2003/12/06 18:10:26 jason Exp $
#
# BioPerl module for Bio::TreeIO::cluster
#
# Contributed by Guillaume Rousse <Guillaume-dot-Rousse-at-inria-dot-fr>
#
# Copyright INRIA
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::TreeIO::cluster - A TreeIO driver module for parsing Algorithm::Cluster::treecluster output

=head1 SYNOPSIS

  # do not use this module directly
  use Bio::TreeIO;
  use Algorithm::Cluster::treecluster;
  my ($result, $linkdist) = Algorithm::Cluster::treecluster(
    distances => $matrix
  );
  my $treeio = new Bio::TreeIO(
    -format   => 'cluster',
    -result   =>  $result,
    -linkdist => $linkdist,
    -labels   => $labels
  );
  my $tree = $treeio->next_tree;

=head1 DESCRIPTION

This is a driver module for parsing Algorithm::Cluster::treecluster output.

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list.  Your participation is much appreciated.

  bioperl-l at bioperl.org              - General discussion
  http://bioperl.org/MailList.shtml  - About the mailing lists

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
the web:

  http://bugzilla.bioperl.org/

=head1 AUTHOR - Guillaume Rousse

Email Guillaume-dot-Rousse-at-inria-dot-fr

Describe contact details here

=head1 CONTRIBUTORS

Additional contributors names and emails here

=head1 APPENDIX

The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _

=cut


# Let the code begin...


package Bio::TreeIO::cluster;
use vars qw(@ISA);
use strict;

use Bio::TreeIO;
use Bio::Event::EventGeneratorI;
use IO::String;

@ISA = qw(Bio::TreeIO);

sub _initialize {
  my ($self, %args) = @_;
  $self->{_result}   = $args{'-result'};
  $self->{_linkdist} = $args{'-linkdist'};
  $self->{_labels}   = $args{'-labels'};
  $self->SUPER::_initialize(%args);
}

=head2 next_tree

 Title   : next_tree
 Usage   : my $tree = $treeio->next_tree
 Function: Gets the next tree in the stream
 Returns : Bio::Tree::TreeI
 Args    : none


=cut

sub next_tree {
    my ($self) = @_;

    $self->_eventHandler->start_document();

    # build tree from the root
    $self->_eventHandler->start_element({Name => 'tree'});
    $self->_recurse(-1, 0);
    $self->_recurse(-1, 1);
    $self->_eventHandler->end_element({Name => 'tree'});

    return $self->_eventHandler->end_document;
}

sub _recurse {
    my ($self, $line, $column) = @_;

    my $id  = $self->{_result}->[$line]->[$column];
    if ($id >= 0) {
	# leaf
	$self->debug("leaf $id\n");
	$self->debug("distance $self->{_linkdist}->[$line]\n");
	$self->debug("label $self->{_labels}->[$id]\n");
	$self->_eventHandler->start_element({Name => 'node'});
	$self->_eventHandler->start_element({Name => 'branch_length'});
	$self->_eventHandler->characters($self->{_linkdist}->[$line]);
	$self->_eventHandler->end_element({Name => 'branch_length'});
	$self->_eventHandler->start_element({Name => 'id'});
	$self->_eventHandler->characters($self->{_labels}->[$id]);
	$self->_eventHandler->end_element({Name => 'id'});
	$self->_eventHandler->start_element({Name => 'leaf'});
	$self->_eventHandler->characters(1);
	$self->_eventHandler->end_element({Name => 'leaf'});
	$self->_eventHandler->end_element({Name => 'node'});
    } else {
	# internal node
	$self->debug("internal node $id\n");
	$self->debug("distance $self->{_linkdist}->[$line]\n");
	$self->_eventHandler->start_element({Name => 'node'});
	$self->_eventHandler->start_element({Name => 'branch_length'});
	$self->_eventHandler->characters($self->{_linkdist}->[$line]);
	$self->_eventHandler->end_element({Name => 'branch_length'});
	$self->_eventHandler->start_element({Name => 'leaf'});
	$self->_eventHandler->characters(0);
	$self->_eventHandler->end_element({Name => 'leaf'});
	$self->_eventHandler->start_element({Name => 'tree'});
	my $child_id = - ($id + 1);
	$self->_recurse($child_id, 0);
	$self->_recurse($child_id, 1);
	$self->_eventHandler->end_element({Name => 'tree'});
	$self->_eventHandler->end_element({Name => 'node'});
    }
}

=head2 write_tree

 Title   : write_tree
 Usage   :
 Function: Sorry not possible with this format
 Returns : none
 Args    : none


=cut

sub write_tree{
    $_[0]->throw("Sorry the format 'cluster' can only be used as an input format");
}

1;
-------------- next part --------------
--- /usr/lib/perl5/vendor_perl/5.8.6/Bio/TreeIO/svggraph.pm	2003-11-28 07:27:16.000000000 +0100
+++ Bio/TreeIO/svggraph.pm	2005-01-04 13:57:14.265334869 +0100
@@ -86,22 +86,16 @@
 
 @ISA = qw(Bio::TreeIO );
 
-=head2 new
-
- Title   : new
- Usage   : my $obj = new Bio::TreeIO::svggraph();
- Function: Builds a new Bio::TreeIO::svggraph object 
- Returns : Bio::TreeIO::svggraph
- Args    :
-
-
-=cut
-
-sub new {
-  my($class, at args) = @_;
-
-  my $self = $class->SUPER::new(@args);
-
+sub _initialize {
+  my ($self, %args) = @_;
+  $self->{_width}        = $args{'-width'} || 1600;
+  $self->{_height}       = $args{'-height'} || 1000;
+  $self->{_margin}       = $args{'-margin'} || 30;
+  $self->{_stroke}       = $args{'-stroke'} || 'black';
+  $self->{_stroke_width} = $args{'-stroke_width'} || 2;
+  $self->{_font_size}    = $args{'-font_size'} || '10px';
+  $self->{_normalize}    = $args{'-normalize'};
+  $self->SUPER::_initialize(%args);
 }
 
 =head2 write_tree
@@ -116,28 +110,35 @@
 
 sub write_tree{
    my ($self,$tree) = @_;
-   my $line = _write_tree_Helper($tree->get_root_node);
+   my $line = $self->_write_tree_Helper($tree->get_root_node);
    $self->_print($line. "\n");
    $self->flush if $self->_flush_on_write && defined $self->_fh;
    return;
 }
 
 sub _write_tree_Helper {
-   my ($node) = @_;
+   my ($self,$node) = @_;
 
-   #this needs to be parameterized
-   my $graph = SVG::Graph->new(width=>1600,height=>1000,margin=>30);
+   my $graph = SVG::Graph->new(
+       width  => $self->{_width},
+       height => $self->{_height},
+       margin => $self->{_margin}
+   );
 
    my $group0 = $graph->add_frame;
    my $tree = SVG::Graph::Data::Tree->new;
    my $root = SVG::Graph::Data::Node->new;
    $root->name($node->id);
-   _decorateRoot($root, $node->each_Descendent());
+   $self->_decorateRoot($root, $node->each_Descendent());
    $tree->root($root);
    $group0->add_data($tree);
 
-   #this needs to be parameterized
-   $group0->add_glyph('tree', stroke=>'black','stroke-width'=>2,'font-size'=>'10px');
+   $group0->add_glyph(
+       'tree',
+       'stroke'       => $self->{_stroke},
+       'stroke-width' => $self->{_stroke_width},
+       'font-size'    => $self->{_font_size}
+   );
 
    return($graph->draw);
 }
@@ -156,16 +157,21 @@
 =cut
 
 sub _decorateRoot{
-  my $previousNode = shift;
-  my @children = @_;
-   foreach my $child (@children)
-	 {
-	   my $currNode = SVG::Graph::Data::Node->new;
-	   $currNode->branch_label($child->id);
-	   $currNode->branch_length($child->branch_length);
-	   $previousNode->add_daughter($currNode);
-	   _decorateRoot($currNode, $child->each_Descendent());
-	 }
+  my ($self,$previousNode, at children) = @_;
+  foreach my $child (@children) {
+    my $currNode = SVG::Graph::Data::Node->new;
+    $currNode->branch_label($child->id);
+    my $length = $child->branch_length;
+    CASE: {
+      if ($self->{_normalize} eq 'log') {
+	$length = log($length + 1);
+	last CASE;
+      }
+    }
+    $currNode->branch_length($length);
+    $previousNode->add_daughter($currNode);
+    $self->_decorateRoot($currNode, $child->each_Descendent());
+  }
 }
 
 =head2 next_tree


More information about the Bioperl-l mailing list