[Bioperl-l] Re: Bioperl tree class
   
    Richard Copley
     
    copley@embl-heidelberg.de
       
    Wed, 10 Oct 2001 18:23:17 +0200
    
    
  
This is a multi-part message in MIME format.
--------------080505080607050108010303
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
>>Did you (or anyone else) get around to writing a bioperl object that can
>>read in PHYLIP format tree files? I am on the verge of writing one; it
>>wouldn't take long, but there's no sense duplicating effort.
>>
> 
> No, I haven't, neither has my group in Singapore, just mailing bioperl, to
> check if Jason or others are working on it.
I started writing one ages ago but didn't finish - I attach it on the off 
chance it's useful - realistically, it would take a lot of work. This uses 
recursion - not sure if this is the best way to do it. I also put in a 
function for drawing the tree using GD. Again this works in a limited fashion 
but is far from a production product.
The one thing I want to plead for any bioperl Tree class is that any 
implementation can deal with multifurcating trees. (Does that make them graphs?).
-- 
  Richard Copley
  EMBL
  Meyerhofstr.1
  69012 Heidelberg
  Germany
  Tel: +49 6221 387 534
  FAX: +49 6221 387 517
--------------080505080607050108010303
Content-Type: type=audio/x-pn-realaudio-plugin;
 name="Tree.pm"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="Tree.pm"
=head1 NAME
Tree - An object for parsing Newick style tree files
=head1 SYNOPSIS
   use Tree;
   
   $tree_string = '(((fly,worm,mosquito),(human,ape)),yeast);';
   $tree = Tree->new();
   $tree->parse_string($tree_string);
   Make a png:
   $png = $tree->GD_tree($height_per_leaf,$width_per_node,$border);
   Wish list - not implemented:
   Collapse branches not supported by bootstrap values:
   $tree->collapse_unsupported(750);
   implement by removing node, and replacing with kids using splice
   --- then require some trickery with n_kids counter.
   Get the highest node which has a given set of leaves under it:
   $node = $tree->highest_node_for_clade('cerevisiae','pombe');
   (should work for a single species)
   Rerooting a tree with a given outgroup:
   $tree->outgroup($node);
=head1 DESCRIPTION
Parses Newick style phylogenies (e.g. clustal *.ph, *.phb, *.dnd files) 
into a tree object. Supports branch lengths and bootstrap values. The tree
can be mulitfurcating - i.e. multiple branches per node are supported.
To create an image from a tree object:
   $png = $tree->GD_tree($height_per_leaf,$width_per_node,$border);
where dimensions are in pixels. width_per_node is actually a scaling 
factor, multiplied by the distance to the node - for trees without 
distances, this will be equivalent to the width in pixels.
=head1 AUTHOR
Richard Copley - Email copley@embl-heidelberg.de
=head1 APPENDIX
The rest of the documentation details each of the object methods. 
=cut
package Tree;
use strict;
use Carp;
use GD;
my $depth = 0;
my $pos = -1;
my $Node = 'node';
my $Leaf = 'leaf';
sub new {
   my $proto = shift;
   my $class = ref($proto) || $proto;
   my $self = {};
   bless ($self,$class);
   $self->{n_kids} = 0;
   $self->{name} = '';
   $self->{parent} = undef;
   return $self;
}
sub parse_string {
   my $self = shift;
   my $tree_string = shift;
 
   $depth = 0;
   $pos = -1;       # reset string index
   
   my @tree = split(//,$tree_string);
   $self->read_tree(\@tree);
   return;
}
sub read_tree {
   my $tree = shift;
   my $string = shift;
   
   $depth++;
   $pos++;
#   print "Entering: Depth: $depth, position: $pos\n";
   while(1){
      my $kid;
      if($string->[$pos] eq '('){
         $kid = $tree->new_kid();
         $kid->read_tree($string);
         $pos++;
         if($pos >= $#{$string}){ return }
      }
      my $buffer = '';
      while(($string->[$pos] ne ')') && ($string->[$pos] ne '(' ) ){
         $buffer .= $string->[$pos];
         $pos++;
 
         # a little defensiveness to stop if something goes wrong
         if(!defined($string->[$pos])){
            croak "Problem parsing tree file - check format";
         }
      }
   
#      print "Buffer contents: $buffer\n";
      my @leaves = split(/,/,$buffer);
      foreach my $leaf (@leaves){
         next if ($leaf eq '');
         my $dist = 1;
         my $bootstrap = undef;
         if($leaf =~ /:(\d+\.\d+)/){
            $dist = $1;
         }
         if($leaf =~ /\[(\d+)\]/){
            $bootstrap = $1;
         }
         if($leaf =~ /^:/){
            # just a distance for the current node - not a leaf
            $kid->{dist} = $dist;
            $kid->{boot} = $bootstrap;
            next;
         } else {
            $leaf =~ s/:.*//;
         }
         $tree->new_kid( 'name' => $leaf,
                         'boot' => $bootstrap,
                         'dist' => $dist,
                         'type' => $Leaf,
                       );
      }
      if($string->[$pos] eq ')'){
#         print "Leaving: Depth: $depth, position: $pos\n";
         $depth--;
         return;
      }
      if($pos >= $#{$string}){ 
         die "Fell through - problem parsing tree\n";
      }
#      print "Looping: Depth: $depth, position: $pos\n";
      # if we fall through to here, we must have a '('
      # the while loop will take us straight back to a read_tree call
   }
}
sub new_kid {
   my $parent = shift;
   my %params = @_;
   my $class = ref($parent) || $parent;
   my $kid = {};
   bless ($kid,$class);
   my $n_kids = $parent->{n_kids};
   $parent->{n_kids}++;
   $kid->{parent} = $parent;
   $kid->{n_kids} = 0;
   $kid->{type} = $Node; # everything a node until changed into a leaf
   $kid->{boot} = undef;
   $kid->{dist} = 1;
   $parent->{kid}[$n_kids] = $kid;
   while(( my $key, my $value) = each %params){
      $kid->{$key} = $value;
   } 
   return $kid;
}
=head2
 Title   : leaf_count
 Usage   : $no_of_leaves = $tree->leaf_count()
 Function: calculates the number of leaves under each node in a tree - 
           useful for working out how to display things
 Returns : the number of leaves under a node
 Args    : none
=cut
sub leaf_count {
   my $self = shift;
   if (@_) { $self->{n_leaves} = shift }
   if(defined($self->{n_leaves})){
      return $self->{n_leaves};
   }
   my $leaf_count = 0;
   foreach my $kid (@{$self->{kid}}){
      if($kid->{type} eq $Node){
         $leaf_count += $kid->leaf_count;
      } elsif($kid->{type} eq $Leaf){
         $kid->{n_leaves} = 1;
         $leaf_count++;
      } else {
         croak "Unknown type in count_leaves";
      }
   }
   $self->{n_leaves} = $leaf_count;
   return $leaf_count;
}
sub leaves {
   my $self = shift;
   my $leaves = shift;
   foreach my $kid (@{$self->{kid}}){
      if($kid->{type} eq $Node){
         $kid->leaves($leaves);
      } elsif($kid->{type} eq $Leaf){
         push(@{$leaves},$kid->{name});
      } else {
         croak "Unknown type in count_leaves";
      }
   }
   return;
}
sub kid_count {
   
   my $self = shift;
   if (@_) { $self->{n_kids} = shift }
   return $self->{n_kids};
}
sub name {
   my $self = shift;
   if (@_) { $self->{name} = shift }
   return $self->{name};
}
sub type {
   my $self = shift;
   if (@_) { $self->{type} = shift }
   return $self->{name};
}
=head2
 Title   : depth 
 Usage   : $depth = $tree->max_depth($current_depth,$$max_depth);
 Function: calculates the maximum depth of the tree
 Returns : the current depth - the maximum depth under the node 
           is in the scalar ref of the second argument
 Args    : none
=cut
sub depth {
   my $self = shift;
   my $depth = shift;
   my $deepest = shift;
   $depth++;
   foreach my $kid (@{$self->{kid}}){
      $depth = $kid->depth($depth,$deepest);
      $depth--;
      if($depth > $$deepest){
         $$deepest = $depth;
      }
      print "depth: [$depth], deepest, ",$$deepest,"\n";
   }
   return $depth;
}
=head2
 Title   : reroot
 Usage   : $tree->reroot(leaf_name);
 Function: reroot the tree with the 'leaf_name' as the leftmost leaf
 Returns : ??
 Args    : none
=cut
sub reroot {
  
   my $self = shift;
   my $root_leaf = shift;
   my $root = $self->get_leaf($root_leaf);
}
=head2
 Title   : get_leaf
 Usage   : $tree->get_leaf('human');
 Function: get the object corresponding to the named leaf
 Returns : the leaf object
 Args    : the wanted leaf
=cut
sub get_leaf {
   my $self = shift;
   my $wanted_leaf = shift;
   foreach my $kid (@{$self->{kid}}){
      my $leaf = $self->get_leaf($wanted_leaf);
      if($leaf eq $wanted_leaf){
         return $leaf;
      }
   }
   return undef;
}
=head2
 Title   : GD_tree
 Usage   : $tree->GD_tree(height_per_leaf,width_per_node);
 Function: Make a png of the tree
 Returns : the binary image
 Args    : none
=cut
sub GD_tree {
   my $self = shift;
   my $height_per_leaf = shift;
   my $width_per_node = shift;
   my $border = shift;
   my $height = (( $self->leaf_count - 1 ) * $height_per_leaf );
   my $depth = 0;
   $self->depth(0,\$depth);
   my $length = $width_per_node * $depth;
   print "Height per leaf: [$height_per_leaf pixels]\n";
   my $im = new GD::Image($length+($border*3),$height+($border*2));
   my $colours = {};
   $colours->{white} = $im->colorAllocate(255,255,255);
   $colours->{black} = $im->colorAllocate(0,0,0);
   $colours->{red} = $im->colorAllocate(255,0,0);
   $colours->{blue} = $im->colorAllocate(0,0,255);
   $im->transparent($colours->{white});
   my $x = $border;
   
   $self->_draw_nodes($im,0,$border,$height_per_leaf,
                      $width_per_node,$border,$colours);
#   $self->{kid}[0]->_draw_nodes($im,0,$border,$height_per_leaf,
#                                 $width_per_node,$border,$colours);
   return $im;
}
sub _draw_nodes {
   my $self = shift;
   my $im = shift;
   my $top = shift;
   my $x = shift;
   my $height_per_leaf = shift;
   my $width_per_node = shift;
   my $border = shift;
   my $colours = shift;
   print "Kids: ",$self->kid_count,"\n";
   my $last_y;
   foreach my $kid (@{$self->{kid}}){
      my $leaf_count = $kid->leaf_count;
      my $bottom = (($leaf_count-1) * $height_per_leaf) + $top;
      my $node_height = $top + (($bottom - $top) / 2);
      my $leaf_height = $top;
      print "top: $top bottom: $bottom leaves: $leaf_count\n";
      my $x2 = $x+$kid->{dist}*$width_per_node;
      printf("Base: $bottom\n");
      if($kid->{type} eq $Node){
         print "node drawing at: $x,$node_height\n";
         $im->filledRectangle($x,($border+$node_height),
                              $x2,($border+$node_height+3),
                              $colours->{black});
         $kid->_draw_nodes($im,$top,$x2,$height_per_leaf,
                           $width_per_node,$border,$colours);
         if(defined($last_y)){
            $im->filledRectangle($x,($border+$last_y),
                                 $x+3,($border+$node_height),
                                 $colours->{black});
         }
         $last_y = $node_height;
         $top = $bottom + $height_per_leaf;
      } else {
         # leaf drawing code
         print "leaf drawing: x: [$x] y: [$leaf_height] ",$kid->{name},"\n";
         $im->filledRectangle($x,($border+$leaf_height),
                              $x2,($border+$leaf_height+3),
                              $colours->{blue});
         if(defined($last_y)){
            $im->filledRectangle($x,($border+$last_y),
                                 $x+3,($border+$leaf_height),
                                 $colours->{black});
         }
         $last_y = $leaf_height;
         $im->string(gdMediumBoldFont,
                     $x2+5,($border+$leaf_height-5),
                     $kid->{name},
                     $colours->{red});
         $top += $height_per_leaf;
      }
  
   }
   return;
}
1;
--------------080505080607050108010303--