[Bioperl-l] Re: [Bioperl-guts-l] Notification: incoming/974

Roger Hall Roger Hall" <roger@iosea.com
Sat, 9 Jun 2001 19:17:08 -0700


This is a multi-part message in MIME format.

------=_NextPart_000_008C_01C0F118.C6A95870
Content-Type: text/plain;
	charset="iso-8859-1"
Content-Transfer-Encoding: 7bit

Can some test this? I added a C float switch to P in HSP.pm for this
case...stupid decision it is.

Please rename/backup your working HSP.pm first.

Thanks!

Roger

----- Original Message -----
From: "Saurabh D. Patel" <patels02@doc.mssm.edu>
To: "Hilmar Lapp" <hlapp@gmx.net>; <bioperl-l@bioperl.org>
Sent: Saturday, June 09, 2001 2:52 PM
Subject: [Bioperl-l] Re: [Bioperl-guts-l] Notification: incoming/974


> your perl one liner prints cool on my system too.  I looked a little in
> HSP.pm under the BPlite directory and noticed the lines:
>
> my ($p)        = $scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/;
> if (not defined $p) {($p) = $scoreline =~ /Expect = (\S+)/}
>
> compare with HSP.pm under the Blast directory:
>
> $p      = "1$p"      if defined $p and $p=~ /^e/i;
>
> I think adding this line would catch this case.
>
> Saurabh.
>
> Hilmar Lapp wrote:
>
> > Have you checked what $hsp->P() really returns?
> >
> >         perl -e '$x = "e-108"; if($x < 0.01) { print "cool\n"; } else {
> > print "uncool\n"; }'
> >
> > prints "cool" on my system (perl 5.005003). I think this is a
> > known parsing bug, but I'm not sure.
> >
> >         Hilmar
> > --
> > -----------------------------------------------------------------
> > Hilmar Lapp                              email: hilmarl@yahoo.com
> > GNF, San Diego, Ca. 92122                phone: +1 858 812 1757
> > -----------------------------------------------------------------
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l@bioperl.org
> http://bioperl.org/mailman/listinfo/bioperl-l

------=_NextPart_000_008C_01C0F118.C6A95870
Content-Type: application/octet-stream;
	name="HSP.pm"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="HSP.pm"

#########################################################################=
######
# Bio::Tools::BPlite::HSP
#########################################################################=
######
# HSP =3D High Scoring Pair (to all non-experts as I am)
#
# The original BPlite.pm module has been written by Ian Korf !
# see http://sapiens.wustl.edu/~ikorf
#
# You may distribute this module under the same terms as perl itself

package Bio::Tools::BPlite::HSP;

use vars qw(@ISA);
use strict;

# to disable overloading comment this out:
#use overload '""' =3D> '_overload';

# Object preamble - inheriets from Bio::SeqFeature::SimilarityPair

use Bio::SeqFeature::SimilarityPair;
use Bio::SeqFeature::Similarity;

@ISA =3D qw(Bio::SeqFeature::SimilarityPair);

sub new {
    my ($class, @args) =3D @_;

    # workaround to make sure frame is not set before strand is
    # interpreted from query/subject info=20
    # this workaround removes the key from the hash
    # so the superclass does not try and work with it
    # we'll take care of setting it in this module later on

    my %newargs =3D @args;
    foreach ( keys %newargs ) {
	if( /frame$/i ) {
	    delete $newargs{$_};
	}=20
    }
    # done with workaround

    my $self =3D $class->SUPER::new(%newargs);
   =20
    my ($score,$bits,$match,$positive,$p,$qb,$qe,$sb,$se,$qs,
	$ss,$hs,$qname,$sname,$qlength,$slength, $frame) =3D=20
	    $self->_rearrange([qw(SCORE
				  BITS
				  MATCH
				  POSITIVE
				  P
				  QUERYBEGIN
				  QUERYEND
				  SBJCTBEGIN
				  SBJCTEND
				  QUERYSEQ
				  SBJCTSEQ
				  HOMOLOGYSEQ
				  QUERYNAME
				  SBJCTNAME			   =20
				  QUERYLENGTH
				  SBJCTLENGTH
				  FRAME
				  )],@args);
   =20
    # Store the aligned query as sequence feature
    if ($qe > $qb) {		# normal query: start < end
	$self->query( Bio::SeqFeature::Similarity->new
		      (-start=3D>$qb, -end=3D>$qe, -strand=3D>1,=20
		       -source=3D>"BLAST" ) ) }
    else {			# reverse query (i dont know if this is possible, but feel =
free to correct)
	$self->query( Bio::SeqFeature::Similarity->new
		      (-start=3D>$qe, -end=3D>$qb, -strand=3D>-1,
		       -source=3D>"BLAST" ) ) }

    # store the aligned subject as sequence feature
    if ($se > $sb) {		# normal subject
	$self->subject( Bio::SeqFeature::Similarity->new
			(-start=3D>$sb, -end=3D>$se, -strand=3D>1,
			 -source=3D>"BLAST" ) ) }
    else {			# reverse subject: start bigger than end
	$self->subject( Bio::SeqFeature::Similarity->new
			(-start=3D>$se, -end=3D>$sb, -strand=3D>-1,=20
			 -source=3D>"BLAST" ) ) }
    # name the sequences
    $self->query->seqname($qname); # query
    $self->subject->seqname($sname); # subject

    # set lengths
    $self->query->seqlength($qlength); # query
    $self->subject->seqlength($slength); # subject

    # set object vars
    $self->score($score);
    $self->bits($bits);
    $self->significance($p);
    $self->query->frac_identical($match);
    $self->subject->frac_identical($match);
    $self->{'PERCENT'} =3D int((1000 * $match)/
			     $self->query->length)/10;
    $self->{'POSITIVE'} =3D $positive;
    $self->{'QS'} =3D $qs;
    $self->{'SS'} =3D $ss;
    $self->{'HS'} =3D $hs;
   =20
    defined $frame && $self->frame($frame);
    return $self;		# success - we hope!
}

# to disable overloading comment this out:
sub _overload {
	my $self =3D shift;
	return $self->start."..".$self->end." ".$self->bits;
}

=3Dhead2 P

 Title    : P
 Usage    : $hsp->P();
 Function : returns the P (significance) value for a HSP=20
 Example  :=20
 Returns  : (double) significance value
 Args     :

=3Dcut

sub P               {
		    my $float =3D shift->significance(@_);
			my $match =3D '([+-]?)(?=3D\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # =
Perl Cookbook 2.1
		    if ($float =3D~ /^$match$/) {
			    # Is a C float
			    return $float;
			} elsif ("1$float" =3D~ /^$match$/) {
			    # Almost C float, Jitterbug 974
			    return "1$float";
			# Handle another odd number format
			# } elsif ("HACK" =3D~ /^HACKMATCH$/) {
			#     # Almost C float, Jitterbug X
			#     return "HACK";
		    } else {
				print "Warning: $float is not a known number format. Returning zero. =
\n";
				return 0;
		    }
					}

=3Dhead2 percent

 Title    : percent
 Usage    : $hsp->percent();
 Function : returns the percent matching=20
 Returns  : (double) percent matching
 Args     : none

=3Dcut

sub percent         {shift->{'PERCENT'}}

=3Dhead2 match

 Title    : match
 Usage    : $hsp->match();
 Function : returns the match
 Example  :=20
 Returns  : (double) frac_identical=20
 Args     :

=3Dcut

sub match           {shift->query->frac_identical(@_)}

=3Dhead2 positive

 Title    : positive
 Usage    : $hsp->positive();
 Function : returns the number of positive hits=20
 Returns  : (int) number of positive residue hits=20
 Args     : none

=3Dcut

sub positive        {shift->{'POSITIVE'}}

=3Dhead2 querySeq

 Title    : querySeq
 Usage    : $hsp->querySeq();
 Function : returns the query sequence
 Returns  : (string) the Query Sequence=20
 Args     : none

=3Dcut

sub querySeq        {shift->{'QS'}}

=3Dhead2 sbjctSeq

 Title    : sbjctSeq
 Usage    : $hsp->sbjctSeq();
 Function : returns the Sbjct sequence=20
 Returns  : (string) the Sbjct Sequence=20
 Args     : none

=3Dcut

sub sbjctSeq        {shift->{'SS'}}

=3Dhead2 homologySeq

 Title    : homologySeq
 Usage    : $hsp->homologySeq();
 Function : returns the homologous sequence=20
 Returns  : (string) homologous sequence=20
 Args     : none

=3Dcut

sub homologySeq     {shift->{'HS'}}

=3Dhead2 qs

 Title    : qs
 Usage    : $hsp->qs();
 Function : returns the Query Sequence (same as querySeq)
 Returns  : (string) query Sequence=20
 Args     : none

=3Dcut

sub qs              {shift->{'QS'}}

=3Dhead2 ss

 Title    : ss
 Usage    : $hsp->ss();
 Function : returns the subject sequence ( same as sbjctSeq)=20
 Returns  : (string) Sbjct Sequence
 Args     : none

=3Dcut

sub ss              {shift->{'SS'}}

=3Dhead2 hs

 Title    : hs
 Usage    : $hsp->hs();
 Function : returns the Homologous Sequence (same as homologySeq )=20
 Returns  : (string) Homologous Sequence
 Args     : none

=3Dcut

sub hs              {shift->{'HS'}}


sub frame {
    my ($self, $frame) =3D @_;
    if( defined $frame ) {
	if( $frame =3D=3D 0 ) {
	    $frame =3D undef;
	} elsif( $frame !~ /^([+-])?([1-3])/ ) {	   =20
	    $self->warn("Specifying an invalid frame ($frame)");
	    $frame =3D undef;
	} else {=20
	    if( ($1 eq '-' && $self->subject->strand >=3D 0) ||
		($1 eq '+' && $self->subject->strand <=3D 0) ) {
		$self->warn("Frame ($frame) did not match strand of query match (".
			    $self->subject->strand().")");
	    }
	    $frame =3D $2 - 1;
	}
    }
    return $self->SUPER::frame($frame);
}
1;

------=_NextPart_000_008C_01C0F118.C6A95870--