[Bioperl-l] Bio::RangeI intersection and Bio::DB::GFF

Marco Blanchette mblanche at berkeley.edu
Tue May 2 19:30:49 UTC 2006


Dear all--

I have been trying to use the intersection function to extract overlapping
region from alternatively spliced exons as in the following script. The
returned object from the 'my $overlap = $exon1->intersection($exon2);' is
actually loosing the strand of $exon1 if $exon1 is from the negative strand.
Is this behavior expected? Should I check the strand of $exon1 before
working on the object return by any Bio::RangeI function?

Many thanks 

#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::GFF;

MAIN:{

    my $db = Bio::DB::GFF->new( -adaptor => 'dbi::mysql',
                                -dsn =>
'dbi:mysql:database=dmel_43_LS;host=riolab.net',
                                -user => 'guest');
    my $test_db = $db->segment('4');
    
    # Load up the exons into $exons_p
    for my $gene ($test_db->features(-types => 'gene')){

        my $exons_p = extractExons($gene);

        cluster($exons_p) unless ($#{$exons_p} == -1);

    }
}

sub extractExons {
    my $gene = shift;
    my %ex_list;
    my @tcs = $gene->features(    -type =>'processed_transcript',
                                    -attributes =>{Gene => $gene->group});
                   
    for my $tc (@tcs){
        my @exons = $tc->features (-type => 'exon',
                                     -attributes => {Parent => $tc->group}
);        
    
        for (@exons){
            my $ex_id    = $_->id;
            $ex_list{$ex_id} = $_ unless (exists $ex_list{$ex_id});

        }
    
    }
    my @values = values %ex_list;
    return(\@values);
}

sub cluster {
    my $exons_p = shift;
    
    for (my $s = 0; $s <= $#{$exons_p}; $s++){
        for (my $t = $s+1; $t <= $#{$exons_p}; $t++){
            my $exon1 = $exons_p->[$s];
            my $exon2 = $exons_p->[$t];
            
            if (!($exon1->equals($exon2)) && $exon1->overlaps($exon2)){
            
                my $overlap = $exon1->intersection($exon2);
                
                print "===\n";;
                print "ex1\n", $exon1->seq, "\n";
                print "ex2\n", $exon2->seq, "\n";
                print "overlap\n", $overlap->seq, "\n";
            }
        }
    }
}
______________________________
Marco Blanchette, Ph.D.

mblanche at uclink.berkeley.edu

Donald C. Rio's lab
Department of Molecular and Cell Biology
16 Barker Hall
University of California
Berkeley, CA 94720-3204

Tel: (510) 642-1084
Cell: (510) 847-0996
Fax: (510) 642-6062
-- 






More information about the Bioperl-l mailing list