[Bioperl-l] Re: Bioperl tree class

Richard Copley copley@embl-heidelberg.de
Wed, 10 Oct 2001 18:23:17 +0200


This is a multi-part message in MIME format.
--------------080505080607050108010303
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit


>>Did you (or anyone else) get around to writing a bioperl object that can
>>read in PHYLIP format tree files? I am on the verge of writing one; it
>>wouldn't take long, but there's no sense duplicating effort.
>>
> 
> No, I haven't, neither has my group in Singapore, just mailing bioperl, to
> check if Jason or others are working on it.


I started writing one ages ago but didn't finish - I attach it on the off 
chance it's useful - realistically, it would take a lot of work. This uses 
recursion - not sure if this is the best way to do it. I also put in a 
function for drawing the tree using GD. Again this works in a limited fashion 
but is far from a production product.

The one thing I want to plead for any bioperl Tree class is that any 
implementation can deal with multifurcating trees. (Does that make them graphs?).


-- 
  Richard Copley
  EMBL
  Meyerhofstr.1
  69012 Heidelberg
  Germany
  Tel: +49 6221 387 534
  FAX: +49 6221 387 517


--------------080505080607050108010303
Content-Type: type=audio/x-pn-realaudio-plugin;
 name="Tree.pm"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="Tree.pm"


=head1 NAME

Tree - An object for parsing Newick style tree files

=head1 SYNOPSIS

   use Tree;
   
   $tree_string = '(((fly,worm,mosquito),(human,ape)),yeast);';
   $tree = Tree->new();
   $tree->parse_string($tree_string);

   Make a png:

   $png = $tree->GD_tree($height_per_leaf,$width_per_node,$border);

   Wish list - not implemented:

   Collapse branches not supported by bootstrap values:

   $tree->collapse_unsupported(750);
   implement by removing node, and replacing with kids using splice
   --- then require some trickery with n_kids counter.

   Get the highest node which has a given set of leaves under it:

   $node = $tree->highest_node_for_clade('cerevisiae','pombe');
   (should work for a single species)

   Rerooting a tree with a given outgroup:

   $tree->outgroup($node);

=head1 DESCRIPTION

Parses Newick style phylogenies (e.g. clustal *.ph, *.phb, *.dnd files) 
into a tree object. Supports branch lengths and bootstrap values. The tree
can be mulitfurcating - i.e. multiple branches per node are supported.

To create an image from a tree object:

   $png = $tree->GD_tree($height_per_leaf,$width_per_node,$border);

where dimensions are in pixels. width_per_node is actually a scaling 
factor, multiplied by the distance to the node - for trees without 
distances, this will be equivalent to the width in pixels.

=head1 AUTHOR

Richard Copley - Email copley@embl-heidelberg.de

=head1 APPENDIX

The rest of the documentation details each of the object methods. 

=cut

package Tree;
use strict;
use Carp;
use GD;

my $depth = 0;
my $pos = -1;
my $Node = 'node';
my $Leaf = 'leaf';

sub new {

   my $proto = shift;
   my $class = ref($proto) || $proto;
   my $self = {};
   bless ($self,$class);
   $self->{n_kids} = 0;
   $self->{name} = '';
   $self->{parent} = undef;
   return $self;

}

sub parse_string {

   my $self = shift;
   my $tree_string = shift;
 
   $depth = 0;
   $pos = -1;       # reset string index
   
   my @tree = split(//,$tree_string);

   $self->read_tree(\@tree);

   return;

}

sub read_tree {

   my $tree = shift;
   my $string = shift;
   
   $depth++;
   $pos++;
#   print "Entering: Depth: $depth, position: $pos\n";

   while(1){

      my $kid;
      if($string->[$pos] eq '('){
         $kid = $tree->new_kid();
         $kid->read_tree($string);
         $pos++;
         if($pos >= $#{$string}){ return }
      }

      my $buffer = '';

      while(($string->[$pos] ne ')') && ($string->[$pos] ne '(' ) ){

         $buffer .= $string->[$pos];
         $pos++;
 
         # a little defensiveness to stop if something goes wrong
         if(!defined($string->[$pos])){
            croak "Problem parsing tree file - check format";
         }

      }
   
#      print "Buffer contents: $buffer\n";
      my @leaves = split(/,/,$buffer);

      foreach my $leaf (@leaves){

         next if ($leaf eq '');

         my $dist = 1;
         my $bootstrap = undef;
         if($leaf =~ /:(\d+\.\d+)/){
            $dist = $1;
         }
         if($leaf =~ /\[(\d+)\]/){
            $bootstrap = $1;
         }
         if($leaf =~ /^:/){
            # just a distance for the current node - not a leaf
            $kid->{dist} = $dist;
            $kid->{boot} = $bootstrap;
            next;
         } else {
            $leaf =~ s/:.*//;
         }

         $tree->new_kid( 'name' => $leaf,
                         'boot' => $bootstrap,
                         'dist' => $dist,
                         'type' => $Leaf,
                       );

      }

      if($string->[$pos] eq ')'){
#         print "Leaving: Depth: $depth, position: $pos\n";
         $depth--;
         return;
      }

      if($pos >= $#{$string}){ 
         die "Fell through - problem parsing tree\n";
      }

#      print "Looping: Depth: $depth, position: $pos\n";

      # if we fall through to here, we must have a '('
      # the while loop will take us straight back to a read_tree call

   }

}

sub new_kid {

   my $parent = shift;
   my %params = @_;
   my $class = ref($parent) || $parent;
   my $kid = {};
   bless ($kid,$class);

   my $n_kids = $parent->{n_kids};
   $parent->{n_kids}++;

   $kid->{parent} = $parent;
   $kid->{n_kids} = 0;
   $kid->{type} = $Node; # everything a node until changed into a leaf
   $kid->{boot} = undef;
   $kid->{dist} = 1;

   $parent->{kid}[$n_kids] = $kid;

   while(( my $key, my $value) = each %params){
      $kid->{$key} = $value;
   } 

   return $kid;

}

=head2

 Title   : leaf_count
 Usage   : $no_of_leaves = $tree->leaf_count()
 Function: calculates the number of leaves under each node in a tree - 
           useful for working out how to display things
 Returns : the number of leaves under a node
 Args    : none

=cut

sub leaf_count {

   my $self = shift;

   if (@_) { $self->{n_leaves} = shift }

   if(defined($self->{n_leaves})){
      return $self->{n_leaves};
   }

   my $leaf_count = 0;

   foreach my $kid (@{$self->{kid}}){
      if($kid->{type} eq $Node){
         $leaf_count += $kid->leaf_count;
      } elsif($kid->{type} eq $Leaf){
         $kid->{n_leaves} = 1;
         $leaf_count++;
      } else {
         croak "Unknown type in count_leaves";
      }
   }

   $self->{n_leaves} = $leaf_count;

   return $leaf_count;

}

sub leaves {

   my $self = shift;
   my $leaves = shift;

   foreach my $kid (@{$self->{kid}}){
      if($kid->{type} eq $Node){
         $kid->leaves($leaves);
      } elsif($kid->{type} eq $Leaf){
         push(@{$leaves},$kid->{name});
      } else {
         croak "Unknown type in count_leaves";
      }
   }

   return;

}

sub kid_count {
   
   my $self = shift;
   if (@_) { $self->{n_kids} = shift }
   return $self->{n_kids};

}

sub name {

   my $self = shift;
   if (@_) { $self->{name} = shift }
   return $self->{name};

}

sub type {

   my $self = shift;
   if (@_) { $self->{type} = shift }
   return $self->{name};

}

=head2

 Title   : depth 
 Usage   : $depth = $tree->max_depth($current_depth,$$max_depth);
 Function: calculates the maximum depth of the tree
 Returns : the current depth - the maximum depth under the node 
           is in the scalar ref of the second argument
 Args    : none

=cut

sub depth {

   my $self = shift;
   my $depth = shift;
   my $deepest = shift;

   $depth++;
   foreach my $kid (@{$self->{kid}}){
      $depth = $kid->depth($depth,$deepest);
      $depth--;
      if($depth > $$deepest){
         $$deepest = $depth;
      }
      print "depth: [$depth], deepest, ",$$deepest,"\n";
   }

   return $depth;

}

=head2

 Title   : reroot
 Usage   : $tree->reroot(leaf_name);
 Function: reroot the tree with the 'leaf_name' as the leftmost leaf
 Returns : ??
 Args    : none

=cut

sub reroot {
  
   my $self = shift;
   my $root_leaf = shift;

   my $root = $self->get_leaf($root_leaf);

}

=head2

 Title   : get_leaf
 Usage   : $tree->get_leaf('human');
 Function: get the object corresponding to the named leaf
 Returns : the leaf object
 Args    : the wanted leaf

=cut

sub get_leaf {

   my $self = shift;
   my $wanted_leaf = shift;

   foreach my $kid (@{$self->{kid}}){
      my $leaf = $self->get_leaf($wanted_leaf);
      if($leaf eq $wanted_leaf){
         return $leaf;
      }
   }

   return undef;

}

=head2

 Title   : GD_tree
 Usage   : $tree->GD_tree(height_per_leaf,width_per_node);
 Function: Make a png of the tree
 Returns : the binary image
 Args    : none

=cut


sub GD_tree {

   my $self = shift;
   my $height_per_leaf = shift;
   my $width_per_node = shift;
   my $border = shift;

   my $height = (( $self->leaf_count - 1 ) * $height_per_leaf );

   my $depth = 0;
   $self->depth(0,\$depth);

   my $length = $width_per_node * $depth;

   print "Height per leaf: [$height_per_leaf pixels]\n";
   my $im = new GD::Image($length+($border*3),$height+($border*2));

   my $colours = {};

   $colours->{white} = $im->colorAllocate(255,255,255);
   $colours->{black} = $im->colorAllocate(0,0,0);
   $colours->{red} = $im->colorAllocate(255,0,0);
   $colours->{blue} = $im->colorAllocate(0,0,255);

   $im->transparent($colours->{white});

   my $x = $border;
   
   $self->_draw_nodes($im,0,$border,$height_per_leaf,
                      $width_per_node,$border,$colours);

#   $self->{kid}[0]->_draw_nodes($im,0,$border,$height_per_leaf,
#                                 $width_per_node,$border,$colours);

   return $im;

}

sub _draw_nodes {

   my $self = shift;
   my $im = shift;
   my $top = shift;
   my $x = shift;
   my $height_per_leaf = shift;
   my $width_per_node = shift;
   my $border = shift;
   my $colours = shift;

   print "Kids: ",$self->kid_count,"\n";

   my $last_y;

   foreach my $kid (@{$self->{kid}}){

      my $leaf_count = $kid->leaf_count;

      my $bottom = (($leaf_count-1) * $height_per_leaf) + $top;

      my $node_height = $top + (($bottom - $top) / 2);
      my $leaf_height = $top;

      print "top: $top bottom: $bottom leaves: $leaf_count\n";

      my $x2 = $x+$kid->{dist}*$width_per_node;

      printf("Base: $bottom\n");

      if($kid->{type} eq $Node){

         print "node drawing at: $x,$node_height\n";
         $im->filledRectangle($x,($border+$node_height),
                              $x2,($border+$node_height+3),
                              $colours->{black});

         $kid->_draw_nodes($im,$top,$x2,$height_per_leaf,
                           $width_per_node,$border,$colours);

         if(defined($last_y)){
            $im->filledRectangle($x,($border+$last_y),
                                 $x+3,($border+$node_height),
                                 $colours->{black});
         }

         $last_y = $node_height;
         $top = $bottom + $height_per_leaf;

      } else {

         # leaf drawing code
         print "leaf drawing: x: [$x] y: [$leaf_height] ",$kid->{name},"\n";
         $im->filledRectangle($x,($border+$leaf_height),
                              $x2,($border+$leaf_height+3),
                              $colours->{blue});

         if(defined($last_y)){
            $im->filledRectangle($x,($border+$last_y),
                                 $x+3,($border+$leaf_height),
                                 $colours->{black});
         }
         $last_y = $leaf_height;

         $im->string(gdMediumBoldFont,
                     $x2+5,($border+$leaf_height-5),
                     $kid->{name},
                     $colours->{red});

         $top += $height_per_leaf;

      }
  
   }

   return;

}

1;

--------------080505080607050108010303--