From cjfields at dev.open-bio.org Thu Oct 2 17:04:10 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Thu, 2 Oct 2008 17:04:10 -0400 Subject: [Bioperl-guts-l] [14914] bioperl-live/trunk/Bio/AlignIO/xmfa.pm: clean up XMFA parsing, allow spaces in parsing (ende++, from IRC) Message-ID: <200810022104.m92L4Ako031630@dev.open-bio.org> Revision: 14914 Author: cjfields Date: 2008-10-02 17:04:09 -0400 (Thu, 02 Oct 2008) Log Message: ----------- clean up XMFA parsing, allow spaces in parsing (ende++, from IRC) Modified Paths: -------------- bioperl-live/trunk/Bio/AlignIO/xmfa.pm Modified: bioperl-live/trunk/Bio/AlignIO/xmfa.pm =================================================================== --- bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-09-27 05:12:08 UTC (rev 14913) +++ bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-10-02 21:04:09 UTC (rev 14914) @@ -78,94 +78,35 @@ my ($width) = $self->_rearrange([qw(WIDTH)], at _); $self->width($width || $WIDTH); - my ($start, $end, $strand, $name, $seqname, $seq, $seqchar, $entry, - $tempname, $tempdesc, %align, $desc, $maxlen, $extra); + my ($name, $tempname, $seqchar); my $aln = Bio::SimpleAlign->new(); - + my $seqs = 0; # alignments - while (defined ($entry = $self->_readline) ) { + while (defined (my $entry = $self->_readline) ) { chomp $entry; if ( index($entry, '=') == 0 ) { - if ($entry =~ m{score\s*=\s*(\d+)}) { + if (defined $name && $seqchar) { + my $seq = $self->_process_seq($name, $seqchar); + $aln->add_seq($seq); + } + if ($aln && $entry =~ m{score\s*=\s*(\d+)}) { $aln->score($1); } last; - } - if ( $entry =~ s{^>(.*)$}{} ) { - $tempname = $1; - chomp($entry); - $tempdesc = $entry; + } elsif ( $entry =~ m{^>.+$}xms) { if ( defined $name ) { - # put away last name and sequence - if ( $name =~ m{\d+:(\d+)-(\d+)\s([+-]{1})(?:\s+(\S+)\s*(.*))?} ) { - ($start, $end, $seqname, $extra) = ($1, $2, $4, $5); - $strand = ($3 eq '+') ? 1 : - ($3 eq '-') ? -1 : - 0; - } else { - $self->throw("Does not comform to XMFA format"); - } - $seq = Bio::LocatableSeq->new( - -strand => $strand, - -seq => $seqchar, - -display_id => $seqname, - -description => $extra, - -start => $start, - -end => $end, - ); + my $seq = $self->_process_seq($name, $seqchar); $aln->add_seq($seq); - $self->debug("Reading $seqname\n"); } - $desc = $tempdesc; - $name = $tempname; - $desc = $entry; - $seqchar = ""; - next; + $seqchar = ''; + $name = $entry; + } else { + $seqchar .= $entry; } - $seqchar .= $entry; } - # Next two lines are to silence warnings that - # otherwise occur at EOF when using <$fh> - $name = "" if (!defined $name); - $seqchar="" if (!defined $seqchar); - - # Put away last name and sequence - if ( $name =~ m{\d+:(\d+)-(\d+)\s([+-]{1})\s+(\S+)\s*(.*)} ) { - ($start, $end, $seqname, $extra) = ($1, $2, $4, $5); - $strand = ($3 eq '+') ? 1 : - ($3 eq '-') ? -1 : - 0; - } - - # If $end <= 0, we have either reached the end of - # file in <> or we have encountered some other error - if ( !defined $end || $end <= 0 ) { - undef $aln; - return $aln; - } - - # This logic now also reads empty lines at the - # end of the file. Skip this is seqchar and seqname is null - unless ( length($seqchar) == 0 && length($seqname) == 0 ) { - $seq = Bio::LocatableSeq->new(-seq => $seqchar, - -strand => $strand, - -display_id => $seqname, - -description => $extra, - -start => $start, - -end => $end, - ); - $aln->add_seq($seq); - $self->debug("Reading $seqname\n"); - } - my $alnlen = $aln->length; - foreach my $seq ( $aln->each_seq ) { - if ( $seq->length < $alnlen ) { - my ($diff) = ($alnlen - $seq->length); - $seq->seq( $seq->seq() . "-" x $diff); - } - } - return $aln; + return $aln if $aln->no_sequences; + return; } =head2 write_aln @@ -258,4 +199,28 @@ return $self->{'_width'} || $WIDTH; } +####### PRIVATE ####### + +sub _process_seq { + my ($self, $entry, $seq) = @_; + my ($start, $end, $strand, $seqname, $desc, $all); + # put away last name and sequence + if ( $entry =~ m{^>\s*\d+:(\d+)-(\d+)\s([+-]{1})(?:\s+(\S+)\s*(\S\.*)?)?} ) { + ($start, $end, $seqname, $desc) = ($1, $2, $4, $5); + $strand = ($4 eq '+') ? 1 : -1; + } else { + $self->throw("Line does not comform to XMFA format:\n$entry"); + } + my $seqobj = Bio::LocatableSeq->new( + -strand => $strand, + -seq => $seq, + -display_id => $seqname, + -description => $desc || $all, + -start => $start, + -end => $end, + ); + $self->debug("Reading $seqname\n"); + return $seqobj; +} + 1; From bugzilla-daemon at portal.open-bio.org Fri Oct 3 10:51:57 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 3 Oct 2008 10:51:57 -0400 Subject: [Bioperl-guts-l] [Bug 2610] New: Fastq module next_seq() entry split problem Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2610 Summary: Fastq module next_seq() entry split problem Product: BioPerl Version: 1.5 branch Platform: PC OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Bio::SeqIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: robert.davey at bbsrc.ac.uk I parse a fastq format file using the following code. my $stream = Bio::SeqIO->newFh(-format => 'fastq', -fh => \*CF); my $qualout = Bio::SeqIO->newFh(-format => 'qual', -fh => \*QF); my $fastaout = Bio::SeqIO->newFh(-format => 'fasta', -fh => \*OF); while (<$stream>) { print $fastaout $_; print $qualout $_; } This works fine for most instances, but only if the quality score line doesn't start with a '@' symbol. Most quality lines look like the following, starting with a '!' symbol: +CBS432-11f16.p1k bases 34 to 1019 !::A>>CIIIIIIIITTTYTNNIIHCCCDDDIIIN However, one of my entries starts thus: +CBS432-11f16.q1k bases 1 to 879 @9 at BIGGFINNNNTIFIFIIFFFIIIIIIIFDD In this instance, because next_seq() splits entries on '\n\@', the parser thinks that the @ denotes a new entry, and the regex fails, throwing an "Can't parse fastq entry" error. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Oct 3 11:43:13 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 3 Oct 2008 11:43:13 -0400 Subject: [Bioperl-guts-l] [Bug 2610] Fastq module next_seq() entry split problem In-Reply-To: Message-ID: <200810031543.m93FhDbX027798@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2610 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |INVALID ------- Comment #1 from cjfields at bioperl.org 2008-10-03 11:43 EST ------- See bug 2335. Already fixed in main trunk, recommebding an update from subversion. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From lstein at dev.open-bio.org Fri Oct 3 13:13:46 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Fri, 3 Oct 2008 13:13:46 -0400 Subject: [Bioperl-guts-l] [14915] bioperl-live/trunk/Bio/Graphics: apply HTML escaping to imagemap title attributes rather than URL escaping Message-ID: <200810031713.m93HDkkN007471@dev.open-bio.org> Revision: 14915 Author: lstein Date: 2008-10-03 13:13:44 -0400 (Fri, 03 Oct 2008) Log Message: ----------- apply HTML escaping to imagemap title attributes rather than URL escaping Modified Paths: -------------- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm bioperl-live/trunk/Bio/Graphics/Panel.pm Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-10-02 21:04:09 UTC (rev 14914) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-10-03 17:13:44 UTC (rev 14915) @@ -1563,7 +1563,7 @@ sub link_pattern { my $self = shift; - my ($linkrule,$feature,$panel) = @_; + my ($linkrule,$feature,$panel,$dont_escape) = @_; $panel ||= 'Bio::Graphics::Panel'; @@ -1574,12 +1574,14 @@ } require CGI unless defined &CGI::escape; + my $escape_method = $dont_escape ? sub {shift} : \&CGI::escape; + my $n; $linkrule ||= ''; # prevent uninit warning my $seq_id = $feature->can('seq_id') ? $feature->seq_id() : $feature->location->seq_id(); $seq_id ||= $feature->seq_id; #fallback - $linkrule =~ s/\$(\w+)/ - CGI::escape( + $linkrule =~ s!\$(\w+)! + $escape_method->( $1 eq 'ref' ? (($n = $seq_id) && "$n") || '' : $1 eq 'name' ? (($n = $feature->display_name) && "$n") || '' : $1 eq 'class' ? eval {$feature->class} || '' @@ -1596,7 +1598,7 @@ : $1 eq 'id' ? $feature->feature_id || '' : '$'.$1 ) - /exg; + !exg; return $linkrule; } @@ -1621,9 +1623,9 @@ for my $label ($self->feature2label($feature)) { my $linkrule = $self->setting($label,'title'); - $linkrule ||= $self->setting(general=>'title'); + $linkrule ||= $self->setting(general=>'title'); next unless $linkrule; - return $self->link_pattern($linkrule,$feature); + return $self->link_pattern($linkrule,$feature,undef,1); } my $method = eval {$feature->method} || $feature->primary_tag; Modified: bioperl-live/trunk/Bio/Graphics/Panel.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-02 21:04:09 UTC (rev 14914) +++ bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-03 17:13:44 UTC (rev 14915) @@ -1089,6 +1089,8 @@ my $boxes = $self->boxes; my (%track2link,%track2title,%track2target); + eval "require CGI" unless CGI->can('escapeHTML'); + my $map = qq(\n); foreach (@$boxes){ my ($feature,$left,$top,$right,$bottom,$track) = @$_; @@ -1105,11 +1107,11 @@ : $track2target{$track} ||= (defined $track->option('target')? $track->option('target') : $targetrule); my $href = $self->make_link($lr,$feature); - my $alt = $self->make_link($tr,$feature); - my $target = $self->make_link($tgr,$feature); - $alt = $self->make_title($feature) unless defined $alt; + my $title = CGI::escapeHTML($self->make_link($tr,$feature,1)); + my $target = CGI::escapeHTML($self->make_link($tgr,$feature,1)); - my $a = $alt ? qq(title="$alt" alt="$alt") : ''; + + my $a = $title ? qq(title="$title") : ''; my $t = $target ? qq(target="$target") : ''; $map .= qq(\n) if $href; } @@ -1119,10 +1121,10 @@ sub make_link { my $self = shift; - my ($linkrule,$feature) = @_; + my ($linkrule,$feature,$escapeHTML) = @_; eval "require Bio::Graphics::FeatureFile;1" unless Bio::Graphics::FeatureFile->can('link_pattern'); - return Bio::Graphics::FeatureFile->link_pattern($linkrule,$feature,$self); + return Bio::Graphics::FeatureFile->link_pattern($linkrule,$feature,$self,$escapeHTML); } sub make_title { @@ -2551,8 +2553,8 @@ HTML document like so: my ($url,$map,$mapname) = - $panel->image_and_map(-link=>'http://www.google.com/searche?q=$name'); - print qq(),"\n"; + $panel->image_and_map(-link=>'http://www.google.com/search?q=$name'); + print qq(),"\n"; print $map,"\n"; =item $url = $panel-Ecreate_web_image($url,$root) From bugzilla-daemon at portal.open-bio.org Fri Oct 3 14:20:16 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 3 Oct 2008 14:20:16 -0400 Subject: [Bioperl-guts-l] [Bug 2612] New: Warning message trying to parse xmfa file with AlignIO (all gap sequence present) Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2612 Summary: Warning message trying to parse xmfa file with AlignIO (all gap sequence present) Product: BioPerl Version: unspecified Platform: Macintosh URL: http://sial.org/pbot/32452 OS/Version: Mac OS Status: NEW Severity: normal Priority: P2 Component: Core Components AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: andrew.stewart at med.navy.mil The first next_aln run on the xmfa file (see URL) produces 6 instances of the following warning... -------------------- WARNING --------------------- MSG: Got a sequence with no letters in it cannot guess alphabet [] --------------------------------------------------- ...and then stops parsing. A subsequent next_aln will resume parsing through the xmfa file. Probable cause: 1st sequence of 1st alignment contains only gaps, making it impossible to determine alphabet -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Oct 6 22:24:15 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 6 Oct 2008 22:24:15 -0400 Subject: [Bioperl-guts-l] [Bug 2614] New: invoking "use Bio::Tools::dpAlign; " produced error Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2614 Summary: invoking "use Bio::Tools::dpAlign;" produced error Product: BioPerl Version: unspecified Platform: PC OS/Version: Linux Status: NEW Keywords: Bioperl Severity: blocker Priority: P2 Component: bioperl-ext AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: andreas.lehmann at fccc.edu When using directive "use Bio::Tools::dpAlign;" in the calling Perl script, the output from compiling this script fails: Too many arguments for Bio::Ext::Align::Align_Protein_Sequences at /usr/share/perl5/Bio/Tools/dpAlign.pm line 420, near "})" Too many arguments for Bio::Ext::Align::Align_Protein_Sequences at /usr/share/perl5/Bio/Tools/dpAlign.pm line 480, near "})" Compilation failed in require at ./seq_score_align.pl line 6. BEGIN failed--compilation aborted at ./seq_score_align.pl line 6. Looking at the call for Align_Protein_Sequences in module dpAlign.pm reveals 4 arguments being transfered to the subroutine at lines 420 and 480, respectively. However, looking at the (supposed) subroutine definition in /usr/local/share/perl/5.8.8/Bio/Ext/Align/Align.xs, line 3334 reveals that only 3 arguments are defined. Please advise if I overlooked some important point. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From heikki at dev.open-bio.org Tue Oct 7 11:14:26 2008 From: heikki at dev.open-bio.org (Heikki Lehvaslaiho) Date: Tue, 7 Oct 2008 11:14:26 -0400 Subject: [Bioperl-guts-l] [14916] bioperl-run/trunk: wrapper works now with PhyML 3. 0 while remaining compatible with 2.4.4 Message-ID: <200810071514.m97FEQXb000701@dev.open-bio.org> Revision: 14916 Author: heikki Date: 2008-10-07 11:14:25 -0400 (Tue, 07 Oct 2008) Log Message: ----------- wrapper works now with PhyML 3.0 while remaining compatible with 2.4.4 Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm bioperl-run/trunk/t/Phyml.t Modified: bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-10-03 17:13:44 UTC (rev 14915) +++ bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-10-07 15:14:25 UTC (rev 14916) @@ -1,4 +1,3 @@ -# $Id: QuickTree.pm 13928 2007-06-14 15:23:09Z sendu $ # # BioPerl module for Bio::Tools::Run::Phylo::Phyml # @@ -50,7 +49,7 @@ =head1 DESCRIPTION -This is a wrapper for running the phyml application by St?phane +This is a wrapper for running the phyml application by Stephane Guindon and Olivier Gascuel. You can download it from: http://atgc.lirmm.fr/phyml/ @@ -64,12 +63,12 @@ =over -=item 1. +=item 1. Make sure the Phyml executable is in your path. Copy it to, or create a symbolic link from a directory that is in your path. -=item 2. +=item 2. Define an environmental variable PHYMLDIR which is a directory which contains the 'phyml' application: In bash: @@ -80,7 +79,7 @@ setenv PHYMLDIR /home/username/phyml_v2.4.4/exe -=item 3. +=item 3. Include a definition of an environmental variable PHYMLDIR in every script that will use this Phyml wrapper module, e.g.: @@ -92,7 +91,7 @@ =head2 Running -This wrapper has been tested with PHYML v2.4.4. +This wrapper has been tested with PHYML v2.4.4 and v.3.0 In its current state, the wrapper supports only input of one MSA and output of one tree. It can easily be extended to support more advanced @@ -156,6 +155,12 @@ # protein map { $models->{1}->{$_} = 1 } qw(JTT MtREV Dayhoff WAG); +our $models3; +# DNA +map { $models3->{'nt'}->{$_} = 1 } qw(HKY85 JC69 K80 F81 F84 TN93 GTR ); +# protein +map { $models3->{'aa'}->{$_} = 1 } + qw(WAG JTT MtREV Dayhoff DCMut RtREV CpREV VT Blosum62 MtMam MtArt HIVw HIVb ); =head2 program_name @@ -198,7 +203,7 @@ -kappa => 'e' or float, [e] -invar => 'e' or float, [e] -category_number => integer, [1] - -alpha => 'e' or float, [e] + -alpha => 'e' or float (int v3),[e] -tree => 'BIONJ' or your own, [BION] -opt_topology => boolean [y] -opt_lengths => boolean [y] @@ -208,7 +213,7 @@ sub new { my ($class, @args) = @_; my $self = $class->SUPER::new(@args); - + # for consistency with other run modules, allow params to be dashless my %args = @args; while (my ($key, $val) = each %args) { @@ -217,58 +222,108 @@ $args{'-'.$key} = $val; } } - - my ($data_type, $data_format, $dataset_count, $model, $kappa, $invar, + + my ($data_type, $data_format, $dataset_count, $model, $freq, $kappa, $invar, $category_number, $alpha, $tree, $opt_topology, - $opt_lengths) = $self->_rearrange([qw( DATA_TYPE - DATA_FORMAT - DATASET_COUNT - MODEL - KAPPA - INVAR - CATEGORY_NUMBER - ALPHA - TREE - OPT_TOPOLOGY - OPT_LENGTHS)], %args); + $opt_lengths, $opt, $search, $rand_start, $rand_starts, $rand_seed) + = $self->_rearrange([qw( DATA_TYPE + DATA_FORMAT + DATASET_COUNT + MODEL + FREQ + KAPPA + INVAR + CATEGORY_NUMBER + ALPHA + TREE + OPT_TOPOLOGY + OPT_LENGTHS + OPT + SEARCH + RAND_START + RAND_STARTS + RAND_SEED + )], %args); $self->data_type($data_type) if $data_type; $self->data_format($data_format) if $data_format; $self->dataset_count($dataset_count) if $dataset_count; $self->model($model) if $model; + $self->freq($kappa) if $freq; $self->kappa($kappa) if $kappa; $self->invar($invar) if $invar; $self->category_number($category_number) if $category_number; $self->alpha($alpha) if $alpha; $self->tree($tree) if $tree; $self->opt_topology($opt_topology) if $opt_topology; - $self->opt_lengths ($opt_lengths) if $opt_lengths; - + $self->opt_lengths($opt_lengths) if $opt_lengths; + $self->opt($opt) if $opt; + $self->search($search) if $search; + $self->rand_start($rand_start) if $rand_start; + $self->rand_starts($rand_starts) if $rand_starts; + $self->rand_seed($rand_seed) if $rand_seed; + + return $self; } +=head2 version + Title : version + Usage : exit if $prog->version < 1.8 + Function: Determine the version number of the program + Example : + Returns : float or undef + Args : none + +Phyml before 3.0 did not display the version. Assume 2.44. + +=cut + +sub version { + my $self = shift; + + return $self->{'_version'} if defined $self->{'_version'}; + my $exe = $self->executable || return; + my $string = substr `$exe --help`, 0, 40 ; + my ($version) = $string =~ /PhyML v([\d+\.]+)/; + $self->{'_version'} = $version; + $version ? (return $version) : return '2.44' +} + + =head2 data_type Title : data_type - Usage : $phyml->data_type('dna'); - Function: Sets sequence alphabet to 'dna' or 'protein' + Usage : $phyml->data_type('nt'); + Function: Sets sequence alphabet to 'dna' (nt in v3) or 'aa' If leaved unset, will be set automatically Returns : set value, defaults to 'protein' - Args : None to get, 'dna' or 'protein' to set. + Args : None to get, 'dna' ('nt') or 'aa' to set. =cut sub data_type { my ($self, $value) = @_; - if (defined $value) { - if ($value eq 'dna') { - $self->{_data_type} = '0'; - } else { - $self->{_data_type} = '1'; + if ($self->version >= 3 ) { + if (defined $value) { + if ($value eq 'nt') { + $self->{_data_type} = 'nt'; + } else { + $self->{_data_type} = 'aa'; + } } + return 'aa' unless defined $self->{_data_type}; + } else { + if (defined $value) { + if ($value eq 'dna') { + $self->{_data_type} = '0'; + } else { + $self->{_data_type} = '1'; + } + } + return '1' unless defined $self->{_data_type}; } - return '1' unless defined $self->{_data_type}; return $self->{_data_type}; } @@ -276,7 +331,7 @@ =head2 data_format Title : data_format - Usage : $phyml->data_format('dna'); + Usage : $phyml->data_format('s'); Function: Sets PHYLIP format to 'i' interleaved or 's' sequential Returns : set value, defaults to 'i' @@ -320,11 +375,18 @@ Title : model Usage : $phyml->model('HKY'); - Function: Choose the substitution model to use. One of + Function: Choose the substitution model to use. One of JC69 | K2P | F81 | HKY | F84 | TN93 | GTR (DNA) - JTT | MtREV | Dayhoff | WAG (Amino-Acids) + JTT | MtREV | Dayhoff | WAG (amino acids) + v3.0: + HKY85 (default) | JC69 | K80 | F81 | F84 | + TN93 | GTR (DNA) + WAG (default) | JTT | MtREV | Dayhoff | DCMut | + RtREV | CpREV | VT | Blosum62 | MtMam | MtArt | + HIVw | HIVb (amino acids) + Returns : Name of the model, defaults to {HKY|JTT} Args : None to get, string to set. @@ -333,22 +395,63 @@ sub model { my ($self, $value) = @_; if (defined ($value)) { - $self->throw("Not a valid model name [$value] for current data type (alphabet)") + if ($self->version >= 3 ) { + unless ($value =~ /\d{6}/) { + $self->throw("Not a valid model name [$value] for current data type (alphabet)") + unless $models3->{$self->data_type}->{$value}; + } + } else { + $self->throw("Not a valid model name [$value] for current data type (alphabet)") unless $models->{$self->data_type}->{$value}; - $self->{_model} = $value; + } + $self->{_model} = $value; } + if ($self->{_model}) { return $self->{_model}; } - elsif ($self->data_type) { - return 'JTT'; # protein + + if ($self->version >= 3 ) { + if ($self->data_type eq 'aa') { + return 'WAG'; # protein + } else { + return 'HKY85'; # DNA + } } else { - return 'HKY'; # DNA + if ($self->data_type) { + return 'JTT'; # protein + } else { + return 'HKY'; # DNA + } } } +=head2 freq + Title : freq + Usage : $phyml->freq(e); $phyml->freq("0.2, 0.6, 0.6, 0.2"); + Function: Sets nucleotide frequences or asks residue to be estimated + according to two models: e or d + Returns : set value, + Args : None to get, string to set. + +v3 only. + +=cut + +sub freq { + my ($self, $value) = @_; + $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; + if (defined $value) { + die "Invalid value [$value]" + unless $value =~ /^[\d\. ]$/ or $value eq 'e' or $value eq 'd'; + $self->{_freq} = $value; + } + return $self->{_freq}; +} + + =head2 kappa Title : kappa @@ -395,14 +498,13 @@ } - =head2 category_number Title : category_number Usage : $phyml->category_number(4); Function: Sets number of relative substitution rate categories Returns : set value, defaults to 1 - Args : None to get, float or integer to set. + Args : None to get, integer to set. =cut @@ -431,7 +533,7 @@ sub alpha { my ($self, $value) = @_; if (defined $value) { - die "Invalid number [$value]" + die "Invalid number [$value]" unless $value =~ /^[-+]?\d*\.?\d*$/ or $value eq 'e'; $self->{_alpha} = $value; } @@ -446,7 +548,7 @@ Usage : $phyml->tree('/tmp/tree.nwk'); Function: Sets starting tree, leave unset to estimate a distance tree Returns : set value, defaults to 'BIONJ' - Args : None to get, float or integer to set. + Args : None to get, newick tree file name to set. =cut @@ -471,18 +573,21 @@ Returns : {y|n} (default y) Args : None to get, boolean to set. +v2.* only + =cut sub opt_topology { my ($self, $value) = @_; + $self->throw("Not a valid parameter for to PhyML v3") if $self->version >= 3; @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Tue Oct 7 12:38:44 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 7 Oct 2008 12:38:44 -0400 Subject: [Bioperl-guts-l] [Bug 2614] invoking "use Bio::Tools::dpAlign; " produced error In-Reply-To: Message-ID: <200810071638.m97GciFH013929@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2614 ------- Comment #1 from cjfields at bioperl.org 2008-10-07 12:38 EST ------- Unfortunately I'm unable to reproduce this (I get no error) when using code from svn. I recommend updating to the svn versions for both bioperl and bioperl-ext to ensure you are getting the latest code. There have been a few changes over the last year to dpAlign and the bioperl-ext modules which may be precipitating the error messages if you are mixing the bioperl 1.5.2 release with svn bioperl-ext (or vice versa). -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Oct 7 12:49:21 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 7 Oct 2008 12:49:21 -0400 Subject: [Bioperl-guts-l] [Bug 2612] Warning message trying to parse xmfa file with AlignIO (all gap sequence present) In-Reply-To: Message-ID: <200810071649.m97GnLZL014465@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2612 ------- Comment #1 from cjfields at bioperl.org 2008-10-07 12:49 EST ------- Created an attachment (id=1003) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1003&action=view) test xmfa file (Mauve, I'm guessing) Attaching test file so we don't lose it to nopaste deletion -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Tue Oct 7 13:52:51 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Tue, 7 Oct 2008 13:52:51 -0400 Subject: [Bioperl-guts-l] [14917] bioperl-live/trunk/Bio: [bug 2612] Message-ID: <200810071752.m97HqpZI003225@dev.open-bio.org> Revision: 14917 Author: cjfields Date: 2008-10-07 13:52:50 -0400 (Tue, 07 Oct 2008) Log Message: ----------- [bug 2612] Allow all-gap sequences; if start == 0, assume sequence is blank and end no longer needs adjustment Modified Paths: -------------- bioperl-live/trunk/Bio/AlignIO/xmfa.pm bioperl-live/trunk/Bio/LocatableSeq.pm Modified: bioperl-live/trunk/Bio/AlignIO/xmfa.pm =================================================================== --- bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-10-07 15:14:25 UTC (rev 14916) +++ bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-10-07 17:52:50 UTC (rev 14917) @@ -28,7 +28,8 @@ =head1 TODO -Finish write_aln(), clean up code +Finish write_aln(), clean up code, allow LargeLocatableSeq (ie for +very large sequences a'la Mauve) =head1 FEEDBACK @@ -92,6 +93,8 @@ if ($aln && $entry =~ m{score\s*=\s*(\d+)}) { $aln->score($1); } + $seqchar = ''; + $name = $entry; last; } elsif ( $entry =~ m{^>.+$}xms) { if ( defined $name ) { @@ -104,9 +107,13 @@ $seqchar .= $entry; } } - - return $aln if $aln->no_sequences; - return; + + # this catches last sequence if '=' is not present (Mauve) + if ( defined $name ) { + my $seq = $self->_process_seq($name, $seqchar); + $aln->add_seq($seq); + } + $aln->no_sequences ? return $aln : return; } =head2 write_aln @@ -212,6 +219,7 @@ $self->throw("Line does not comform to XMFA format:\n$entry"); } my $seqobj = Bio::LocatableSeq->new( + -nowarnonempty => 1, -strand => $strand, -seq => $seq, -display_id => $seqname, Modified: bioperl-live/trunk/Bio/LocatableSeq.pm =================================================================== --- bioperl-live/trunk/Bio/LocatableSeq.pm 2008-10-07 15:14:25 UTC (rev 14916) +++ bioperl-live/trunk/Bio/LocatableSeq.pm 2008-10-07 17:52:50 UTC (rev 14917) @@ -87,6 +87,9 @@ use Bio::Location::Fuzzy; use vars qw($GAP_SYMBOLS $OTHER_SYMBOLS $MATCHPATTERN); +# should we change these to non-globals? (I can see this +# causing problems down the road...) - cjfields + $GAP_SYMBOLS = '\-\.=~'; $OTHER_SYMBOLS = '\*\?'; @@ -99,13 +102,14 @@ my ($class, @args) = @_; my $self = $class->SUPER::new(@args); - my ($start,$end,$strand) = - $self->_rearrange( [qw(START END STRAND)], + my ($start,$end,$strand, $tuple) = + $self->_rearrange( [qw(START END STRAND TUPLE)], @args); defined $start && $self->start($start); defined $end && $self->end($end); defined $strand && $self->strand($strand); + defined $tuple && $self->tuple($tuple); return $self; # success - we hope! } @@ -148,10 +152,10 @@ if( @_ ) { my $value = shift; my $string = $self->seq; - if ($self->seq) { + # start of 0 usually means the sequence is blank (all gaps) + if ($self->seq && $self->start != 0 ) { my $len = $self->_ungapped_len; my $id = $self->id; - # changed 9/14/08 if ($len != $value) { $self->warn("In sequence $id residue count gives end value ". @@ -242,7 +246,7 @@ my ($seq, $count) = (undef, 0); # default gap characters - $char ||= '-.'; + $char ||= $GAP_SYMBOLS; $self->warn("I hope you know what you are doing setting gap to [$char]") unless $char =~ /[-.]/; From cjfields at dev.open-bio.org Tue Oct 7 14:38:23 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Tue, 7 Oct 2008 14:38:23 -0400 Subject: [Bioperl-guts-l] [14918] bioperl-live/trunk/Bio/AlignIO/xmfa.pm: [bug 2612] Message-ID: <200810071838.m97IcNTi003308@dev.open-bio.org> Revision: 14918 Author: cjfields Date: 2008-10-07 14:38:23 -0400 (Tue, 07 Oct 2008) Log Message: ----------- [bug 2612] * undef the name, don't carry it over Modified Paths: -------------- bioperl-live/trunk/Bio/AlignIO/xmfa.pm Modified: bioperl-live/trunk/Bio/AlignIO/xmfa.pm =================================================================== --- bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-10-07 17:52:50 UTC (rev 14917) +++ bioperl-live/trunk/Bio/AlignIO/xmfa.pm 2008-10-07 18:38:23 UTC (rev 14918) @@ -94,7 +94,7 @@ $aln->score($1); } $seqchar = ''; - $name = $entry; + undef $name; last; } elsif ( $entry =~ m{^>.+$}xms) { if ( defined $name ) { From bugzilla-daemon at portal.open-bio.org Tue Oct 7 14:43:25 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 7 Oct 2008 14:43:25 -0400 Subject: [Bioperl-guts-l] [Bug 2612] Warning message trying to parse xmfa file with AlignIO (all gap sequence present) In-Reply-To: Message-ID: <200810071843.m97IhP07009398@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2612 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #2 from cjfields at bioperl.org 2008-10-07 14:43 EST ------- Committed a preliminary fix to subversion. It should now allow all gap sequences (essentially blank sequences) and also bypass end setting if the start == 0. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Tue Oct 7 18:06:14 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Tue, 7 Oct 2008 18:06:14 -0400 Subject: [Bioperl-guts-l] [14919] bioperl-live/trunk/t/Handler.t: TODO's work Message-ID: <200810072206.m97M6ETg003582@dev.open-bio.org> Revision: 14919 Author: cjfields Date: 2008-10-07 18:06:14 -0400 (Tue, 07 Oct 2008) Log Message: ----------- TODO's work Modified Paths: -------------- bioperl-live/trunk/t/Handler.t Modified: bioperl-live/trunk/t/Handler.t =================================================================== --- bioperl-live/trunk/t/Handler.t 2008-10-07 18:38:23 UTC (rev 14918) +++ bioperl-live/trunk/t/Handler.t 2008-10-07 22:06:14 UTC (rev 14919) @@ -732,28 +732,17 @@ # version, seq_update, dates (5 tests) is($seq->version, 40); my ($ann) = $seq->annotation->get_Annotations('seq_update'); -TODO: { - local $TODO = 'grabbing seq_update with old SwissProt seqs now failing'; - eval {is($ann->display_text, 35,'operator overloading in AnnotationI is deprecated')}; - ok(!$@); -} +eval {is($ann->display_text, 35,'operator overloading in AnnotationI is deprecated')}; +ok(!$@); my @dates = $seq->get_dates; my @date_check = qw(01-NOV-1997 01-NOV-1997 16-OCT-2001); for my $date (@dates) { my $expdate = shift @date_check; - if ($expdate) { - is($date, $expdate,'dates'); - } else { - TODO: { - local $TODO = 'grabbing all dates with old SwissProt seqs now failing'; - is($date, $expdate); - } - } + is($date, $expdate,'dates'); } - my @gns2 = $seq->annotation->get_Annotations('gene_name'); # check gene name is preserved (was losing suffix in worm gene names) ok($#gns2 == 0 && $gns[0]->value eq $gns2[0]->value); From cjfields at dev.open-bio.org Tue Oct 7 18:06:41 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Tue, 7 Oct 2008 18:06:41 -0400 Subject: [Bioperl-guts-l] [14920] bioperl-live/trunk/Bio/Seq/LargeSeqI.pm: add svn tag Message-ID: <200810072206.m97M6fkQ003610@dev.open-bio.org> Revision: 14920 Author: cjfields Date: 2008-10-07 18:06:40 -0400 (Tue, 07 Oct 2008) Log Message: ----------- add svn tag Modified Paths: -------------- bioperl-live/trunk/Bio/Seq/LargeSeqI.pm Modified: bioperl-live/trunk/Bio/Seq/LargeSeqI.pm =================================================================== --- bioperl-live/trunk/Bio/Seq/LargeSeqI.pm 2008-10-07 22:06:14 UTC (rev 14919) +++ bioperl-live/trunk/Bio/Seq/LargeSeqI.pm 2008-10-07 22:06:40 UTC (rev 14920) @@ -1,4 +1,4 @@ -# $Id $ +# $Id$ # # BioPerl module for Bio::Seq::LargeSeqI # From heikki at dev.open-bio.org Wed Oct 8 09:12:31 2008 From: heikki at dev.open-bio.org (Heikki Lehvaslaiho) Date: Wed, 8 Oct 2008 09:12:31 -0400 Subject: [Bioperl-guts-l] [14921] bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm: documentation and reporting fixes, method reorganisation Message-ID: <200810081312.m98DCVO1005263@dev.open-bio.org> Revision: 14921 Author: heikki Date: 2008-10-08 09:12:30 -0400 (Wed, 08 Oct 2008) Log Message: ----------- documentation and reporting fixes, method reorganisation Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm Modified: bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-10-07 22:06:40 UTC (rev 14920) +++ bioperl-run/trunk/Bio/Tools/Run/Phylo/Phyml.pm 2008-10-08 13:12:30 UTC (rev 14921) @@ -162,34 +162,6 @@ map { $models3->{'aa'}->{$_} = 1 } qw(WAG JTT MtREV Dayhoff DCMut RtREV CpREV VT Blosum62 MtMam MtArt HIVw HIVb ); -=head2 program_name - - Title : program_name - Usage : $factory>program_name() - Function: holds the program name - Returns : string - Args : None - -=cut - -sub program_name { - return $PROGRAM_NAME; -} - -=head2 program_dir - - Title : program_dir - Usage : $factory->program_dir(@params) - Function: returns the program directory, obtiained from ENV variable. - Returns : string - Args : None - -=cut - -sub program_dir { - return $PROGRAM_DIR; -} - =head2 new Title : new @@ -267,6 +239,34 @@ return $self; } +=head2 program_name + + Title : program_name + Usage : $factory>program_name() + Function: holds the program name + Returns : string + Args : None + +=cut + +sub program_name { + return $PROGRAM_NAME; +} + +=head2 program_dir + + Title : program_dir + Usage : $factory->program_dir(@params) + Function: returns the program directory, obtiained from ENV variable. + Returns : string + Args : None + +=cut + +sub program_dir { + return $PROGRAM_DIR; +} + =head2 version Title : version @@ -276,7 +276,8 @@ Returns : float or undef Args : none -Phyml before 3.0 did not display the version. Assume 2.44. +Phyml before 3.0 did not display the version. Assume 2.44 when can not +determine it. =cut @@ -292,6 +293,73 @@ } +=head2 run + + Title : run + Usage : $factory->run($aln_file); + $factory->run($align_object); + Function: Runs Phyml to generate a tree + Returns : Bio::Tree::Tree object + Args : file name for your input alignment in a format + recognised by AlignIO, OR Bio::Align::AlignI + complient object (eg. Bio::SimpleAlign). + +=cut + +sub run { + my ($self, $in) = @_; + + if (ref $in && $in->isa("Bio::Align::AlignI")) { + $in = $self->_write_phylip_align_file($in); + } + elsif (! -e $in) { + $self->throw("When not supplying a Bio::Align::AlignI object, ". + "you must supply a readable filename"); + } + elsif (-e $in) { + copy ($in, $self->tempdir); + my $name = File::Spec->splitpath($in); # name is the last item in the array + $in = File::Spec->catfile($self->tempdir, $name); + } + + return $self->_run($in); +} + +=head2 stats + + Title : stats + Usage : $factory->stats; + Function: Returns the contents of the phyml '_phyml_stat.txt' output file + Returns : string with statistics about the run, undef before run() + Args : none + +=cut + +sub stats { + my $self = shift;; + return $self->{_stats}; +} + +=head2 tree_string + + Title : tree_string + Usage : $factory->tree_string; + $factory->run($align_object); + Function: Returns the contents of the phyml '_phyml_tree.txt' ouput file + Returns : string with tree in Newick format, undef before run() + Args : none + +=cut + +sub tree_string { + my $self = shift;; + return $self->{_tree}; +} + +=head2 Getsetters + +These methods are used to set and get program parameters before running. + =head2 data_type Title : data_type @@ -427,31 +495,7 @@ } } -=head2 freq - Title : freq - Usage : $phyml->freq(e); $phyml->freq("0.2, 0.6, 0.6, 0.2"); - Function: Sets nucleotide frequences or asks residue to be estimated - according to two models: e or d - Returns : set value, - Args : None to get, string to set. - -v3 only. - -=cut - -sub freq { - my ($self, $value) = @_; - $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; - if (defined $value) { - die "Invalid value [$value]" - unless $value =~ /^[\d\. ]$/ or $value eq 'e' or $value eq 'd'; - $self->{_freq} = $value; - } - return $self->{_freq}; -} - - =head2 kappa Title : kappa @@ -563,7 +607,9 @@ return $self->{_tree} || 'BIONJ'; } +=head2 v2 options +These methods can be used with PhyML v2* only. =head2 opt_topology @@ -579,7 +625,7 @@ sub opt_topology { my ($self, $value) = @_; - $self->throw("Not a valid parameter for to PhyML v3") if $self->version >= 3; + $self->throw("Not a valid parameter [opt_topology] for to PhyML v3") if $self->version >= 3; if (defined ($value)) { if ($value) { $self->{_opt_topology} = 'y'; @@ -605,7 +651,7 @@ sub opt_lengths { my ($self, $value) = @_; - $self->throw("Not a valid parameter for PhyML v3") if $self->version >= 3; + $self->throw("Not a valid parameter [opt_lengths] for PhyML v3") if $self->version >= 3; if (defined ($value)) { if ($value) { $self->{_opt_lengths} = 'y'; @@ -616,6 +662,34 @@ return $self->{_opt_lengths} || 'y'; } +=head2 v3 options + +These methods can be used with PhyML v3* only. + +=head2 freq + + Title : freq + Usage : $phyml->freq(e); $phyml->freq("0.2, 0.6, 0.6, 0.2"); + Function: Sets nucleotide frequences or asks residue to be estimated + according to two models: e or d + Returns : set value, + Args : None to get, string to set. + +v3 only. + +=cut + +sub freq { + my ($self, $value) = @_; + $self->throw("Not a valid parameter [freq] prior to PhyML v3") if $self->version < 3; + if (defined $value) { + die "Invalid value [$value]" + unless $value =~ /^[\d\. ]$/ or $value eq 'e' or $value eq 'd'; + $self->{_freq} = $value; + } + return $self->{_freq}; +} + =head2 opt Title : opt @@ -630,7 +704,7 @@ sub opt { my ($self, $value) = @_; - $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; + $self->throw("Not a valid parameter [opt] prior to PhyML v3") if $self->version < 3; if (defined ($value)) { $self->{_opt} = $value if $value =~ /tlr|tl|tr|l|n/; } @@ -651,7 +725,7 @@ sub search { my ($self, $value) = @_; - $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; + $self->throw("Not a valid parameter [search] prior to PhyML v3") if $self->version < 3; if (defined ($value)) { $self->{_search} = $value if $value =~ /NNI|SPR|BEST/; } @@ -666,14 +740,14 @@ Returns : boolean (defaults to false) Args : None to get, boolean to set. -v3.* only; only meaningful if $prog->search is 'SPR' +v3.* only; only meaningful if $prog-Esearch is 'SPR' =cut sub rand_start { my ($self, $value) = @_; - $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; + $self->throw("Not a valid parameter [rand_start] prior to PhyML v3") if $self->version < 3; if (defined ($value)) { if ($value) { $self->{_rand_start} = 1; @@ -693,13 +767,13 @@ Returns : integer (defaults to 1) Args : None to get, integer to set. -v3.* only; only valid if $prog->search is 'SPR' +v3.* only; only valid if $prog-Esearch is 'SPR' =cut sub rand_starts { my ($self, $value) = @_; - $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; + $self->throw("Not a valid parameter [rand_starts] prior to PhyML v3") if $self->version < 3; if (defined $value) { die "Invalid number [$value]" unless $value =~ /^[-+]?\d+$/; @@ -717,15 +791,15 @@ Returns : random integer Args : None to get, integer to set. -v3.* only; only valid if $prog->search is 'SPR' +v3.* only; only valid if $prog-Esearch is 'SPR' -Uses perl rand() to initialize if not explicitely set +Uses perl rand() to initialize if not explicitely set. =cut sub rand_seed { my ($self, $value) = @_; - $self->throw("Not a valid parameter prior to PhyML v3") if $self->version < 3; + $self->throw("Not a valid parameter [rand_seed] prior to PhyML v3") if $self->version < 3; if (defined $value) { die "Invalid number [$value]" unless $value =~ /^[-+]?\d+$/; @@ -735,71 +809,12 @@ } +=head2 Internal methods +These methods are private and should not be called outside this class. -=head2 run - - Title : run - Usage : $factory->run($aln_file); - $factory->run($align_object); - Function: Runs Phyml to generate a tree - Returns : Bio::Tree::Tree object - Args : file name for your input alignment in a format - recognised by AlignIO, OR Bio::Align::AlignI - complient object (eg. Bio::SimpleAlign). - =cut -sub run { - my ($self, $in) = @_; - - if (ref $in && $in->isa("Bio::Align::AlignI")) { - $in = $self->_write_phylip_align_file($in); - } - elsif (! -e $in) { - $self->throw("When not supplying a Bio::Align::AlignI object, ". - "you must supply a readable filename"); - } - elsif (-e $in) { - copy ($in, $self->tempdir); - my $name = File::Spec->splitpath($in); # name is the last item in the array - $in = File::Spec->catfile($self->tempdir, $name); - } - - return $self->_run($in); -} - -=head2 stats - - Title : stats - Usage : $factory->stats; - Function: Returns the contents of the phyml '_phyml_stat.txt' output file - Returns : string with statistics about the run, undef before run() - Args : none - -=cut - -sub stats { - my $self = shift;; - return $self->{_stats}; -} - -=head2 tree_string - - Title : tree_string - Usage : $factory->tree_string; - $factory->run($align_object); - Function: Returns the contents of the phyml '_phyml_tree.txt' ouput file - Returns : string with tree in Newick format, undef before run() - Args : none - -=cut - -sub tree_string { - my $self = shift;; - return $self->{_tree}; -} - sub _run { my ($self, $file)= @_; From lstein at dev.open-bio.org Wed Oct 8 13:07:31 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Wed, 8 Oct 2008 13:07:31 -0400 Subject: [Bioperl-guts-l] [14922] bioperl-live/trunk/Bio/Graphics/FeatureFile.pm: try to distinguish between #aabbcc colors and right-hand comments Message-ID: <200810081707.m98H7VxC005738@dev.open-bio.org> Revision: 14922 Author: lstein Date: 2008-10-08 13:07:31 -0400 (Wed, 08 Oct 2008) Log Message: ----------- try to distinguish between #aabbcc colors and right-hand comments Modified Paths: -------------- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-10-08 13:12:30 UTC (rev 14921) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-10-08 17:07:31 UTC (rev 14922) @@ -559,7 +559,7 @@ my $self = shift; local $_ = shift; - s/\s+\#.*$//; # strip right-column comments + s/\#.*$// unless /\#[0-9a-f]{6}\s*$/i; # strip right-column comments unless they look like colors if (/^\s+(.+)/ && $self->{current_tag}) { # configuration continuation line my $value = $1; From lstein at dev.open-bio.org Wed Oct 8 18:10:40 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Wed, 8 Oct 2008 18:10:40 -0400 Subject: [Bioperl-guts-l] [14923] bioperl-live/trunk/Bio/Graphics/Panel.pm: added a workaround that prevents panel from crashing when trying to draw an image with no tracks in it Message-ID: <200810082210.m98MAerS006261@dev.open-bio.org> Revision: 14923 Author: lstein Date: 2008-10-08 18:10:40 -0400 (Wed, 08 Oct 2008) Log Message: ----------- added a workaround that prevents panel from crashing when trying to draw an image with no tracks in it Modified Paths: -------------- bioperl-live/trunk/Bio/Graphics/Panel.pm Modified: bioperl-live/trunk/Bio/Graphics/Panel.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-08 17:07:31 UTC (rev 14922) +++ bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-08 22:10:40 UTC (rev 14923) @@ -500,6 +500,11 @@ my $width = $self->width + $self->pad_left + $self->pad_right; my $pkg = $self->image_package; + + $height = 12 if $height < 1; # so GD doesn't crash + $width = 1 if $width < 1; # ditto + warn "$pkg->new($width,$height)"; + my $gd = $existing_gd || $pkg->new($width,$height, ($self->{truecolor} && $pkg->can('isTrueColor') ? 1 : ()) ); From lstein at dev.open-bio.org Wed Oct 8 18:11:29 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Wed, 8 Oct 2008 18:11:29 -0400 Subject: [Bioperl-guts-l] [14924] bioperl-live/trunk/Bio/Graphics/Panel.pm: removed stupid debugging statement Message-ID: <200810082211.m98MBTxQ006281@dev.open-bio.org> Revision: 14924 Author: lstein Date: 2008-10-08 18:11:29 -0400 (Wed, 08 Oct 2008) Log Message: ----------- removed stupid debugging statement Modified Paths: -------------- bioperl-live/trunk/Bio/Graphics/Panel.pm Modified: bioperl-live/trunk/Bio/Graphics/Panel.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-08 22:10:40 UTC (rev 14923) +++ bioperl-live/trunk/Bio/Graphics/Panel.pm 2008-10-08 22:11:29 UTC (rev 14924) @@ -503,7 +503,6 @@ $height = 12 if $height < 1; # so GD doesn't crash $width = 1 if $width < 1; # ditto - warn "$pkg->new($width,$height)"; my $gd = $existing_gd || $pkg->new($width,$height, ($self->{truecolor} && $pkg->can('isTrueColor') ? 1 : ()) From heikki at dev.open-bio.org Thu Oct 9 04:20:01 2008 From: heikki at dev.open-bio.org (Heikki Lehvaslaiho) Date: Thu, 9 Oct 2008 04:20:01 -0400 Subject: [Bioperl-guts-l] [14925] bioperl-live/trunk: fixed ps() output; spotted by Mira Han Message-ID: <200810090820.m998K1AY009486@dev.open-bio.org> Revision: 14925 Author: heikki Date: 2008-10-09 04:20:00 -0400 (Thu, 09 Oct 2008) Log Message: ----------- fixed ps() output; spotted by Mira Han Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/Statistics.pm bioperl-live/trunk/t/TreeStatistics.t Modified: bioperl-live/trunk/Bio/Tree/Statistics.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/Statistics.pm 2008-10-08 22:11:29 UTC (rev 14924) +++ bioperl-live/trunk/Bio/Tree/Statistics.pm 2008-10-09 08:20:00 UTC (rev 14925) @@ -210,7 +210,7 @@ if ($node->is_Leaf) { $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key") unless $node->has_tag($key); - $node->add_tag_value('ps_trait', $node->get_tag_values($key) ); + $node->set_tag_value('ps_trait', $node->get_tag_values($key) ); return; # end of recursion } @@ -231,10 +231,10 @@ } if (keys %intersection) { - map {$node->set_tag_value('ps_trait', $_)} keys %intersection; - $node->add_tag_value('ps_score', $score); + $node->set_tag_value('ps_trait', keys %intersection); + $node->set_tag_value('ps_score', $score); } else { - map {$node->set_tag_value('ps_trait', $_)} keys %union; + $node->set_tag_value('ps_trait',keys %union); $node->set_tag_value('ps_score', $score+1); } Modified: bioperl-live/trunk/t/TreeStatistics.t =================================================================== --- bioperl-live/trunk/t/TreeStatistics.t 2008-10-08 22:11:29 UTC (rev 14924) +++ bioperl-live/trunk/t/TreeStatistics.t 2008-10-09 08:20:00 UTC (rev 14925) @@ -29,7 +29,7 @@ my $key = $tree->add_trait(test_input_file('traits.tab'), 3); is ($key, 'intermediate', 'read traits'); -is $stats->ps($tree, $key), 5, 'parsimony score'; +is $stats->ps($tree, $key), 4, 'parsimony score'; is $stats->ps($tree, $key, $node), 1, 'subtree parsimony score'; is $stats->ai($tree, $key), 0.628906, 'association index'; From lstein at dev.open-bio.org Thu Oct 9 05:48:26 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Thu, 9 Oct 2008 05:48:26 -0400 Subject: [Bioperl-guts-l] [14926] bioperl-live/trunk/Bio/DB/GFF/Adaptor/memory.pm: fixed broken primary_id generation in memory adaptor Message-ID: <200810090948.m999mQtB009611@dev.open-bio.org> Revision: 14926 Author: lstein Date: 2008-10-09 05:48:26 -0400 (Thu, 09 Oct 2008) Log Message: ----------- fixed broken primary_id generation in memory adaptor Modified Paths: -------------- bioperl-live/trunk/Bio/DB/GFF/Adaptor/memory.pm Modified: bioperl-live/trunk/Bio/DB/GFF/Adaptor/memory.pm =================================================================== --- bioperl-live/trunk/Bio/DB/GFF/Adaptor/memory.pm 2008-10-09 08:20:00 UTC (rev 14925) +++ bioperl-live/trunk/Bio/DB/GFF/Adaptor/memory.pm 2008-10-09 09:48:26 UTC (rev 14926) @@ -176,7 +176,7 @@ my $self = shift; my $idx = 0; foreach my $arrayref (values %{$self->{tmp}}) { - foreach (@$arrayref) {$_->{primary_id} = $idx++; } + foreach (@$arrayref) {$_->{feature_id} = $idx++; } push @{$self->{data}},@$arrayref; } 1; @@ -611,7 +611,7 @@ my @found_features; my $data = $self->{data}; - my $feature_id = -1 ; + my $feature_id = -1 ; my $feature_group_id = undef; for my $feature (@{$data}) { @@ -659,7 +659,7 @@ # of found features and continue. my $found_feature = $feature ; - $found_feature->{primary_id} = $feature_id; + $found_feature->{feature_id} = $feature_id; $found_feature->{group_id} = $feature_group_id; push @found_features,$found_feature; } From bugzilla-daemon at portal.open-bio.org Thu Oct 9 18:17:43 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Oct 2008 18:17:43 -0400 Subject: [Bioperl-guts-l] [Bug 2615] New: Argument inconsistency in Bio::SearchIO Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2615 Summary: Argument inconsistency in Bio::SearchIO Product: BioPerl Version: 1.5 branch Platform: PC OS/Version: All Status: NEW Severity: normal Priority: P2 Component: Bio::Search/Bio::SearchIO AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: fossandon at vtr.net I made a script where, depending on the given arguments, it would parse Blast or Hmmer file types like this: my $format = 'hmmer'; # or 'blast' my $temp_file = 'C:\foo.txt'; my $result = Bio::SearchIO->new(-format => $format, -file => "<$temp_file", -signif=> $signif); So I used this handy "-signif" argument I found on the blast.pm documentation (ignore hits with evalue bigger than the cutoff, which by the way it would be nice to be added to the HOWTO:SearchIO in the Bioperl wiki)... and then discovered that SearchIO would ignore "-signif" argument completely when parsing HMMER files and would give me the bad hits. Maybe the code is missing in hmmer.pm module?? Considering how both Blast and Hmmer provides an Evalue in their output (and Bioperl retrieves it in both cases with "$hsp->evalue"), it was a little weird to find it not working for Hmmer. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Oct 9 18:48:33 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Oct 2008 18:48:33 -0400 Subject: [Bioperl-guts-l] [Bug 2615] Argument inconsistency in Bio::SearchIO In-Reply-To: Message-ID: <200810092248.m99MmXkC013280@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2615 jason at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Severity|normal |minor ------- Comment #1 from jason at bioperl.org 2008-10-09 18:48 EST ------- I am unclear from the limited documentation of the feature if it is intended to cutoff at the HSP or the Hit stage. If it is at the Hit stage you can also just add this to your code when you loop: next if $hit->significance > $cutoff; Note that it is only documented in the specific parser implementation (blast.pm) so the signif,bits, etc cutoff implementation so there is no explicit assumption that other parsers support specific arguments not documented in the superclass (Bio::SearchIO). It seems like if it was implemented in the eventhandler it would be easier to apply this sort of cutoff -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Thu Oct 9 19:18:43 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 9 Oct 2008 19:18:43 -0400 Subject: [Bioperl-guts-l] [Bug 2615] Argument inconsistency in Bio::SearchIO In-Reply-To: Message-ID: <200810092318.m99NIhUt014871@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2615 ------- Comment #2 from fossandon at vtr.net 2008-10-09 19:18 EST ------- (In reply to comment #1) Well, I made a workaround at the HPS stage for my script: my $evalue = $hsp->evalue; # When using BLASTALL executable there is an extra comma left, # RPSBLAST don't have that problem $evalue =~ s/,//g; next if ($evalue > $signif); For what I can see, the "-signif" argument sets the "max_significance" value: ######## (Extract) =head2 max_significance Usage : $obj->max_significance(); Purpose : Set/Get the P or Expect value used as significance screening cutoff. This is the value of the -signif parameter supplied to new(). Hits with P or E-value above this are skipped. Returns : Scientific notation number with this format: 1.0e-05. Argument : Scientific notation number or float (when setting) Comments : Screening of significant hits uses the data provided on the : description line. For NCBI BLAST1 and WU-BLAST, this data : is P-value. for NCBI BLAST2 it is an Expect value. =cut sub max_significance { shift->{'_handler_cache'}->max_significance(@_) } =head2 signif Synonym for L =cut sub signif { shift->max_significance(@_) } ######### That "_handler_cache" is related to the "eventhandler"?? Maybe it can be worked from there?? Hmmm, although it may go elsewhere, this "max_significance" value seems the the same than the "inclusion_threshold" that appears just above it, that uses the eventHandler "shift->_eventHandler->inclusion_threshold(@_);"... Would that help?? Or maybe I'm mixing apples and oranges?? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Oct 10 11:51:33 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Oct 2008 11:51:33 -0400 Subject: [Bioperl-guts-l] [Bug 2519] Bio::Assembly::IO::ace allow for non "Contig\d" naming In-Reply-To: Message-ID: <200810101551.m9AFpXx9021890@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2519 mkiwala at watson.wustl.edu changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |mkiwala at watson.wustl.edu Status|RESOLVED |REOPENED Resolution|FIXED | ------- Comment #2 from mkiwala at watson.wustl.edu 2008-10-10 11:51 EST ------- The patch for this bug introduced another bug. The portion of the regex which matches the contig name cannot capture a multi-digit contig number, eg. Contig12. I'm not sure what the policy is here, but before I opened a new bug, I thought I would try having this fixed under either this bug or 2459. A simple test case is to run t/Assembly.t, then change every occurance of "Contig1" to "Contig12" in t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2. Run Assembly.t again -- it dies trying to call get_contig_by_id('Contig12')->add_feature() because the get fails. The fix is to use non-greedy matching on the portion of the regex that matches the word portion of the contig name. Instead of this: ^CO\s[\w\-\_]+(\d+)\s This: ^CO\s[\w\-\_]+?(\d+)\s Run the test again, and the add_feature() will no longer fail. Please let me know if I should open a new bug, then I'll attach my diffs. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Oct 10 14:01:39 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Oct 2008 14:01:39 -0400 Subject: [Bioperl-guts-l] [Bug 2519] Bio::Assembly::IO::ace allow for non "Contig\d" naming In-Reply-To: Message-ID: <200810101801.m9AI1daM001121@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2519 ------- Comment #3 from mkiwala at watson.wustl.edu 2008-10-10 14:01 EST ------- Created an attachment (id=1004) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1004&action=view) modified test data for Assembly.t to expose problem with parsing contig names -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Oct 10 14:02:27 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Oct 2008 14:02:27 -0400 Subject: [Bioperl-guts-l] [Bug 2519] Bio::Assembly::IO::ace allow for non "Contig\d" naming In-Reply-To: Message-ID: <200810101802.m9AI2RVQ001196@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2519 ------- Comment #4 from mkiwala at watson.wustl.edu 2008-10-10 14:02 EST ------- Created an attachment (id=1005) --> (http://bugzilla.open-bio.org/attachment.cgi?id=1005&action=view) patch to Bio::Assembly::IO::ace to fix contig name parsing -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Fri Oct 10 15:17:06 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 10 Oct 2008 15:17:06 -0400 Subject: [Bioperl-guts-l] [14927] bioperl-live/trunk: [bug 2519] Message-ID: <200810101917.m9AJH6eQ016130@dev.open-bio.org> Revision: 14927 Author: cjfields Date: 2008-10-10 15:17:05 -0400 (Fri, 10 Oct 2008) Log Message: ----------- [bug 2519] * fix bug introduced by previous patch (new patch courtesy of Michael Kiwala) Modified Paths: -------------- bioperl-live/trunk/Bio/Assembly/IO/tigr.pm bioperl-live/trunk/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2 Modified: bioperl-live/trunk/Bio/Assembly/IO/tigr.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/IO/tigr.pm 2008-10-09 09:48:26 UTC (rev 14926) +++ bioperl-live/trunk/Bio/Assembly/IO/tigr.pm 2008-10-10 19:17:05 UTC (rev 14927) @@ -1,3 +1,4 @@ +# $Id$ # # BioPerl module for Bio::Assembly::IO::tigr # Modified: bioperl-live/trunk/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2 =================================================================== --- bioperl-live/trunk/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2 2008-10-09 09:48:26 UTC (rev 14926) +++ bioperl-live/trunk/t/data/consed_project/edit_dir/test_project.fasta.screen.ace.2 2008-10-10 19:17:05 UTC (rev 14927) @@ -1,6 +1,6 @@ AS 1 2 -CO Contig1 708 2 51 U +CO Contig12 708 2 51 U atatcTCGAGGGTACTTGAGGATTCTACCACCACAACAACAAAGCCTTAT CATCCAGAACCGCAAATTATAAAAAAGAGCCAACACCACAAAACAAAATG AAAAGAAACAAACTCTTCAACCAGTGATGTTCTGAGACTTGTTGAGGATA @@ -137,13 +137,13 @@ } CT{ -Contig1 Annotation consed 40 50 060216:160521 +Contig12 Annotation consed 40 50 060216:160521 contig extra info } CT{ -Contig1 Annotation consed 20 30 060216:160521 +Contig12 Annotation consed 20 30 060216:160521 COMMENT{ contig tag comment From cjfields at dev.open-bio.org Fri Oct 10 15:18:42 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 10 Oct 2008 15:18:42 -0400 Subject: [Bioperl-guts-l] [14928] bioperl-live/trunk/Bio/Assembly/IO/ace.pm: [bug 2519] Message-ID: <200810101918.m9AJIgqY016175@dev.open-bio.org> Revision: 14928 Author: cjfields Date: 2008-10-10 15:18:41 -0400 (Fri, 10 Oct 2008) Log Message: ----------- [bug 2519] * forgot ace! Modified Paths: -------------- bioperl-live/trunk/Bio/Assembly/IO/ace.pm Modified: bioperl-live/trunk/Bio/Assembly/IO/ace.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/IO/ace.pm 2008-10-10 19:17:05 UTC (rev 14927) +++ bioperl-live/trunk/Bio/Assembly/IO/ace.pm 2008-10-10 19:18:41 UTC (rev 14928) @@ -139,7 +139,7 @@ #}; # Loading contig sequence (COntig sequence field) - (/^CO\s[\w\-\_]+(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\w+)/xms) && do { # New contig found! + (/^CO\s[\w\-\_]+?(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\w+)/xms) && do { # New contig found! my $contigID = $1; $contigOBJ = Bio::Assembly::Contig->new(-source=>'phrap', -id=>$contigID); # $contigOBJ->set_nof_bases($2); # Contig length in base pairs From cjfields at dev.open-bio.org Fri Oct 10 15:19:09 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Fri, 10 Oct 2008 15:19:09 -0400 Subject: [Bioperl-guts-l] [14929] bioperl-live/trunk/Bio/Range.pm: typo Message-ID: <200810101919.m9AJJ9XF016203@dev.open-bio.org> Revision: 14929 Author: cjfields Date: 2008-10-10 15:19:09 -0400 (Fri, 10 Oct 2008) Log Message: ----------- typo Modified Paths: -------------- bioperl-live/trunk/Bio/Range.pm Modified: bioperl-live/trunk/Bio/Range.pm =================================================================== --- bioperl-live/trunk/Bio/Range.pm 2008-10-10 19:18:41 UTC (rev 14928) +++ bioperl-live/trunk/Bio/Range.pm 2008-10-10 19:19:09 UTC (rev 14929) @@ -226,7 +226,7 @@ Example : $st = $range->strand(); $range->strand(-1); Returns : the value of the strandedness (-1, 0 or 1) Args : optionally, the new strand - (-1, 0, 1) or (-, ., +). - Overrides: Bio::RangeI::Strand + Overrides: Bio::RangeI::strand =cut From bugzilla-daemon at portal.open-bio.org Fri Oct 10 15:38:48 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Oct 2008 15:38:48 -0400 Subject: [Bioperl-guts-l] [Bug 2615] Argument inconsistency in Bio::SearchIO In-Reply-To: Message-ID: <200810101938.m9AJcmFn007349@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2615 ------- Comment #3 from cjfields at bioperl.org 2008-10-10 15:38 EST ------- (In reply to comment #2) ... > That "_handler_cache" is related to the "eventhandler"?? Maybe it can be worked > from there?? Hmmm, although it may go elsewhere, this "max_significance" value > seems the the same than the "inclusion_threshold" that appears just above it, > that uses the eventHandler "shift->_eventHandler->inclusion_threshold(@_);"... > Would that help?? Or maybe I'm mixing apples and oranges?? If cutoffs were applied in the event handler, the decision to create results/hits/HSPs could be handled at the appropriate point in the event handler itself (prior to calling the appropriate factory object, which in turn creates the appropriate Bio::Search::* object). I think this could be better handled as callback filters which are passed the relevant hash-based data structure prior to the appropriate factory call (advanced use or private method). We could then set up some simple helper methods (like max_significance) to pass pre-made callbacks to the event handler. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Fri Oct 10 15:40:06 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Fri, 10 Oct 2008 15:40:06 -0400 Subject: [Bioperl-guts-l] [Bug 2581] HMMER parse error In-Reply-To: Message-ID: <200810101940.m9AJe6V2007436@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2581 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |INVALID ------- Comment #3 from cjfields at bioperl.org 2008-10-10 15:40 EST ------- No response. Marking as invalid. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From heikki at dev.open-bio.org Tue Oct 14 06:48:17 2008 From: heikki at dev.open-bio.org (Heikki Lehvaslaiho) Date: Tue, 14 Oct 2008 06:48:17 -0400 Subject: [Bioperl-guts-l] [14930] bioperl-live/trunk/Bio/Tree/Statistics.pm: fix parsimony score of ps() to work with polytomies Message-ID: <200810141048.m9EAmHXj000575@dev.open-bio.org> Revision: 14930 Author: heikki Date: 2008-10-14 06:48:16 -0400 (Tue, 14 Oct 2008) Log Message: ----------- fix parsimony score of ps() to work with polytomies Modified Paths: -------------- bioperl-live/trunk/Bio/Tree/Statistics.pm Modified: bioperl-live/trunk/Bio/Tree/Statistics.pm =================================================================== --- bioperl-live/trunk/Bio/Tree/Statistics.pm 2008-10-10 19:19:09 UTC (rev 14929) +++ bioperl-live/trunk/Bio/Tree/Statistics.pm 2008-10-14 10:48:16 UTC (rev 14930) @@ -180,7 +180,6 @@ than on trait. Trees have method add_traits() to set trait values from a file. -from a file. =head2 ps @@ -211,39 +210,39 @@ $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key") unless $node->has_tag($key); $node->set_tag_value('ps_trait', $node->get_tag_values($key) ); + $node->set_tag_value('ps_score', 0 ); return; # end of recursion } - my ($child1, $child2) = $node->each_Descendent; + foreach my $child ($node->each_Descendent) { + $self->ps($tree, $key, $child); + } - $self->ps($tree, $key, $child1); - $self->ps($tree, $key, $child2); - my %intersection; my %union; - map {$union{$_}++ } $child1->get_tag_values('ps_trait'); - my $score = $child1->get_tag_values('ps_score'); + my $score; - $score += $child2->get_tag_values('ps_score') || 0; - foreach my $trait ($child2->get_tag_values('ps_trait') ) { - $intersection{$trait}++ if $union{$trait}; - $union{$trait}++; + foreach my $child ($node->each_Descendent) { + foreach my $trait ($child->get_tag_values('ps_trait') ) { + $intersection{$trait}++ if $union{$trait}; + $union{$trait}++; + } + $score += $child->get_tag_values('ps_score'); } if (keys %intersection) { $node->set_tag_value('ps_trait', keys %intersection); $node->set_tag_value('ps_score', $score); } else { - $node->set_tag_value('ps_trait',keys %union); + $node->set_tag_value('ps_trait', keys %union); $node->set_tag_value('ps_score', $score+1); } if ($self->verbose) { print "-- node --------------------------\n"; - print "ID: ", $node->id, "\n"; + print "iID: ", $node->internal_id, " (", $node->id, ")\n"; print "Trait: ", join (', ', $node->get_tag_values('ps_trait') ), "\n"; print "length :", scalar($node->get_tag_values('ps_score')) , "\n"; - print "######################################\n"; } return scalar $node->get_tag_values('ps_score'); } From bugzilla-daemon at portal.open-bio.org Tue Oct 14 12:08:05 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 14 Oct 2008 12:08:05 -0400 Subject: [Bioperl-guts-l] [Bug 2615] Argument inconsistency in Bio::SearchIO In-Reply-To: Message-ID: <200810141608.m9EG85ax030752@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2615 ------- Comment #4 from fossandon at vtr.net 2008-10-14 12:08 EST ------- (In reply to comment #3) > If cutoffs were applied in the event handler, the decision to create > results/hits/HSPs could be handled at the appropriate point in the event > handler itself (prior to calling the appropriate factory object, which in turn > creates the appropriate Bio::Search::* object). I think this could be better > handled as callback filters which are passed the relevant hash-based data > structure prior to the appropriate factory call (advanced use or private > method). We could then set up some simple helper methods (like > max_significance) to pass pre-made callbacks to the event handler. That looks fine to me, to handle all the cutoffs from this central place (the event handler) instead of each format module file would make it easier to maintain and expand if necessary I guess. I think that in the case I wrote, the most appropriate place to put the filter would be on "$hit->significance" level (instead of the "$hsp->evalue" level that I used previously in my script)... Unfortunately I don't understand Bioperl good enough to make a patch. I would be very appreciated if you actually implement this. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From hartzell at dev.open-bio.org Tue Oct 14 23:47:14 2008 From: hartzell at dev.open-bio.org (George Hartzell) Date: Tue, 14 Oct 2008 23:47:14 -0400 Subject: [Bioperl-guts-l] [14931] bioperl-live/trunk/Bio/LocatableSeq.pm: Message-ID: <200810150347.m9F3lEAp002064@dev.open-bio.org> Revision: 14931 Author: hartzell Date: 2008-10-14 23:47:14 -0400 (Tue, 14 Oct 2008) Log Message: ----------- * touch up the documentation for column_from_residue_number and location_from_column. There's still a problem with location_from_column, the docs say it will return undef for the first column (given the example) but the reality is that it returns an IN-BETWEEN. This is consistent with how it behaves for column 11 (given the example). I'll ask around about who might be depending on the current behaviour. Modified Paths: -------------- bioperl-live/trunk/Bio/LocatableSeq.pm Modified: bioperl-live/trunk/Bio/LocatableSeq.pm =================================================================== --- bioperl-live/trunk/Bio/LocatableSeq.pm 2008-10-14 10:48:16 UTC (rev 14930) +++ bioperl-live/trunk/Bio/LocatableSeq.pm 2008-10-15 03:47:14 UTC (rev 14931) @@ -275,7 +275,7 @@ Seq1/91-97 AC..DEF.GH - column_from_residue_number(94) returns 5. + column_from_residue_number(94) returns 6. An exception is thrown if the residue number would lie outside the length of the aligment @@ -334,11 +334,11 @@ L where values can be undefined. For example, for the sequence: - Seq/91-97 .AC..DEF.G. + Seq/91-96 .AC..DEF.G. - location_from_column( 3 ) position 93 - location_from_column( 2 ) position 92^93 - location_from_column(10 ) position 97^98 + location_from_column( 3 ) position 92 + location_from_column( 4 ) position 92^93 + location_from_column( 9 ) position 95^96 location_from_column( 1 ) position undef An exact position returns a Bio::Location::Simple object From hartzell at dev.open-bio.org Wed Oct 15 00:11:00 2008 From: hartzell at dev.open-bio.org (George Hartzell) Date: Wed, 15 Oct 2008 00:11:00 -0400 Subject: [Bioperl-guts-l] [14932] bioperl-live/trunk/t/LocatableSeq.t: Message-ID: <200810150411.m9F4B014002264@dev.open-bio.org> Revision: 14932 Author: hartzell Date: 2008-10-15 00:11:00 -0400 (Wed, 15 Oct 2008) Log Message: ----------- * commit a TODO test that catches a problem with getting a column from a locatable seq when the column is before the first residue in the sequence and the sequences start != 1. Modified Paths: -------------- bioperl-live/trunk/t/LocatableSeq.t Modified: bioperl-live/trunk/t/LocatableSeq.t =================================================================== --- bioperl-live/trunk/t/LocatableSeq.t 2008-10-15 03:47:14 UTC (rev 14931) +++ bioperl-live/trunk/t/LocatableSeq.t 2008-10-15 04:11:00 UTC (rev 14932) @@ -7,7 +7,7 @@ use lib 't/lib'; use BioperlTest; - test_begin(-tests => 82); + test_begin(-tests => 83); use_ok('Bio::LocatableSeq'); use_ok('Bio::AlignIO'); @@ -39,8 +39,12 @@ is $loc = $seq->location_from_column(2), undef; +TODO: { + local $TODO = "Need to fix columns before start of seq w/ start > 1"; + $seq->start(90); + is $loc = $seq->location_from_column(2), undef; +} - $str = Bio::AlignIO->new(-file=> test_input_file('testaln.pfam')); ok defined($str); isa_ok $str,'Bio::AlignIO'; From cjfields at dev.open-bio.org Wed Oct 15 14:53:02 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 15 Oct 2008 14:53:02 -0400 Subject: [Bioperl-guts-l] [14933] bioperl-live/trunk: [bug 2563] Message-ID: <200810151853.m9FIr2ej009970@dev.open-bio.org> Revision: 14933 Author: cjfields Date: 2008-10-15 14:53:01 -0400 (Wed, 15 Oct 2008) Log Message: ----------- [bug 2563] * add TagTree convenience method 'pairs' (get a hash of key/val for simple TagTree data) * refactored ARP parsing - add annotation data, new tests Modified Paths: -------------- bioperl-live/trunk/Bio/AlignIO/arp.pm bioperl-live/trunk/Bio/Annotation/TagTree.pm bioperl-live/trunk/t/AlignIO.t Modified: bioperl-live/trunk/Bio/AlignIO/arp.pm =================================================================== --- bioperl-live/trunk/Bio/AlignIO/arp.pm 2008-10-15 04:11:00 UTC (rev 14932) +++ bioperl-live/trunk/Bio/AlignIO/arp.pm 2008-10-15 18:53:01 UTC (rev 14933) @@ -24,12 +24,41 @@ http://lgb.unige.ch/arlequin/ -Note that, at the moment, this only scans the allele sequence data -in the DATA section and inserts them into SimpleAlign objects. ARP -files that contain other data (RFLP, etc.) will not be parsed; if the -DNA data is actually SNP data, then the LocatableSeq object instantiation -will throw an error. +For the moment, this retains the allele sequence data in the DATA section and +inserts them into SimpleAlign objects. ARP files that contain other data (RFLP, +etc.) are not expected to parse properly. Also, if the DNA data is actually SNP +data, then the LocatableSeq object instantiation will throw an error. +This is now set up as a generic parser (i.e. it parses everything) and +collects as much data as possible into the SimpleAlign object. The following +in a general mapping of where data can be found: + + Tag SimpleAlign + Method + ---------------------------------------------------------------------- + Title description + SampleName id + ---------------------------------------------------------------------- + + Tag Bio::Annotation TagName Bio::Annotation + Class Parameters + ---------------------------------------------------------------------- + NE SimpleValue pfam_family_accession value + NL SimpleValue sequence_start_stop value + SS SimpleValue sec_structure_source value + BM SimpleValue build_model value + RN Reference reference * + ---------------------------------------------------------------------- + * RN is generated based on the number of Bio::Annotation::Reference objects + +In addition, the number of samples found in the alignment is retained in a +Bio::Annotation::TagTree object in the annotation collection and is accessible +via: + + ($samples) = $aln->annotation->get_Annotations('Samples'); + say $samples->display_text; + # or use other relevant TagTree methods to retrieve data + =head1 FEEDBACK =head2 Reporting Bugs @@ -55,10 +84,11 @@ package Bio::AlignIO::arp; use strict; +use base qw(Bio::AlignIO); + use Data::Dumper; +use Bio::Annotation::AnnotationFactory; -use base qw(Bio::AlignIO); - =head2 next_aln Title : next_aln @@ -76,44 +106,68 @@ sub next_aln { my $self = shift; my $aln = Bio::SimpleAlign->new(); - - my $data; - my $sdflag = 0; - my %arp; - + my ($data, $cur_block, $cur_type, $cur_data); SCAN: while (defined ($data = $self->_readline) ) { - if ($data =~ m{DataType=(\S+)}) { - $self->datatype($1); - } elsif ($data =~ m{SampleData=}) { - $sdflag = 1; - if (lc($self->datatype) ne 'dna') { - $self->warn("ARP data does not contain sequence!"); - return; + next if $data =~ m{^\s*$}xms; + if ($data =~ m{\[{1,2}(\w+)\]{1,2}}xms) { + $self->{state}->{current_block} = $1; + next SCAN; + } + elsif ($data =~ m{^\s*(\w+)=\s?(\S[^\n]*$)}xms) { + ($cur_type, $cur_data) = ($1, $2); + if ($cur_data =~ m{^\s*\{\s*$}) { + $self->throw("Curly block must be embedded in a named Block") + if !exists($self->{state}->{current_block}); + $self->{state}->{in_curly_block} = 1; + next SCAN; } - next; - } elsif ($data =~ m{SampleName="(.*)"}) { - $aln->description($1); + $cur_data =~ s{["']}{}g; + $cur_data =~ s{\s*$}{}; + # per alignment annotation data (i.e. Sample Blocks) or + # annotation data retained for each alignment? + $self->{state}->{current_block} eq 'Samples' ? + push @{$self->{state}->{SampleAnnotation}->{$cur_type}}, $cur_data : + push @{$self->{state}->{Annotation}->{$cur_type}}, $cur_data; } - DATA: - while ($sdflag) { - $data =~ s{(?:^\s+|\s+$)}{}; - my ($id, $score, $seq) = split m{\s+}, $data,3; - # what to do with the score??? - my $temp; - ($temp = $data) =~ s{[^A-Z\?]}{}gi; - my $newseq = Bio::LocatableSeq->new( - -start => 1, - -end => CORE::length($temp), - -seq => $seq, - -id => $id, - ); - $aln->add_seq($newseq); - $data = $self->_readline; - if ($data =~ /^\s*}\s*$/) { - last SCAN; + elsif ($data =~ m{^\s*\}\s*$}xms) { + $self->throw("Unmatched bracket in ARP file:\n$data") if + !exists($self->{state}->{in_curly_block}); + if ($self->{state}->{current_block} eq 'Samples') {; + my $ac = $self->_process_annotation($aln); + delete $self->{state}->{SampleAnnotation}; + } else { + # process other data at a later point } + delete $self->{state}->{blockdata}; + $self->{state}->{in_curly_block} = 0; + last SCAN; } + else { + # all other data should be in a curly block and have a block title + $self->throw("Data found outside of proper block:\n$data") if + !exists($self->{state}->{current_block}) && !$self->{state}->{in_curly_block}; + # bypass commented stuff (but we may want to process it at a later + # point, so turn back here) + next if $data =~ m{^\s*\#}xms; + if ($self->{state}->{current_block} eq 'Samples') { + chomp $data; + # we have two possible ways to deal with sample number, either + # clone the LocatableSeq (in which case we need to deal with ID + # duplication), or store as annotation data. I chose the latter + # route using a Bio::Annotation::TagTree. YMMV - cjfields 10-15-08 + my ($ls, $samples) = $self->_process_sequence($data); + my $id = $ls->id; + push @{ $self->{state}->{SampleAnnotation}->{Samples} }, [$id => $samples]; + $aln->add_seq($ls); + } else { + # add elsif's for further processing + #$self->debug('Unmatched data in block '. + # $self->{state}->{current_block}. + # ":\n$data\n"); + $self->{state}->{blockdata} .= $data; + } + } } # alignments only returned if they contain sequences return $aln if $aln->no_sequences; @@ -137,10 +191,46 @@ $self->throw_not_implemented; } -sub datatype { - my $self = shift; - return $self->{'_datatype'} = shift if @_; - return $self->{'_datatype'}; +################ PRIVATE SUBS ################ + +sub _process_sequence { + my ($self, $raw) = @_; + return unless defined $raw; + $raw =~ s{(?:^\s+|\s+$)}{}g; + my ($id, $samples, $seq) = split(' ', $raw); + my $ls = Bio::LocatableSeq->new(-seq => $seq, + -start => 1, + -id => $id); + return($ls, $samples); } +sub _process_annotation { + my ($self, $aln) = @_; + my $coll = Bio::Annotation::Collection->new(); + my $factory = Bio::Annotation::AnnotationFactory->new(-type => 'Bio::Annotation::SimpleValue'); + for my $anntype (qw(SampleAnnotation Annotation)) { + for my $key (keys %{ $self->{state}->{$anntype} }) { + if ($key eq 'Title') { + $aln->description($self->{state}->{$anntype}->{$key}[0]); + } elsif ($key eq 'Samples') { + $factory->type('Bio::Annotation::TagTree'); + $coll->add_Annotation($key, $factory->create_object( + -value => [$key => $self->{state}->{$anntype}->{$key}])); + $factory->type('Bio::Annotation::SimpleValue'); + } elsif ($key eq 'SampleName') { + $aln->id($self->{state}->{$anntype}->{$key}[0]); + } else { + $self->throw('Expecting an array reference') unless + ref $self->{state}->{$anntype}->{$key} eq 'ARRAY'; + for my $a (@{ $self->{state}->{$anntype}->{$key} }) { + $coll->add_Annotation($key, $factory->create_object( + -value => $a) ); + } + } + } + } + #$self->debug("Collection:".Dumper($coll)."\n"); + $aln->annotation($coll); +} + 1; Modified: bioperl-live/trunk/Bio/Annotation/TagTree.pm =================================================================== --- bioperl-live/trunk/Bio/Annotation/TagTree.pm 2008-10-15 04:11:00 UTC (rev 14932) +++ bioperl-live/trunk/Bio/Annotation/TagTree.pm 2008-10-15 18:53:01 UTC (rev 14933) @@ -626,6 +626,23 @@ return $self->node->hash; } +=head2 pairs + + Title : pairs + Usage : $struct->pairs; + Function: turns the tag-value tree into a hash, all data values are scalar + Returns : hash + Args : first arg = element name + all other args are added as tag-value pairs, note that duplicates + will be lost + +=cut + +sub pairs { + my ($self) = @_; + return $self->node->pairs; +} + =head2 qmatch Title : qmatch Modified: bioperl-live/trunk/t/AlignIO.t =================================================================== --- bioperl-live/trunk/t/AlignIO.t 2008-10-15 04:11:00 UTC (rev 14932) +++ bioperl-live/trunk/t/AlignIO.t 2008-10-15 18:53:01 UTC (rev 14933) @@ -7,7 +7,7 @@ use lib 't/lib'; use BioperlTest; - test_begin(-tests => 295); @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Wed Oct 15 14:53:10 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Oct 2008 14:53:10 -0400 Subject: [Bioperl-guts-l] [Bug 2563] Bio::AlignIO::arp does not parse multiple alignments In-Reply-To: Message-ID: <200810151853.m9FIrA04022582@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2563 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #1 from cjfields at bioperl.org 2008-10-15 14:53 EST ------- Now fixed in subversion. Closing out. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Oct 15 14:55:31 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Oct 2008 14:55:31 -0400 Subject: [Bioperl-guts-l] [Bug 2614] invoking "use Bio::Tools::dpAlign; " produced error In-Reply-To: Message-ID: <200810151855.m9FItVPI022825@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2614 cjfields at bioperl.org changed: What |Removed |Added ---------------------------------------------------------------------------- Severity|blocker |normal ------- Comment #2 from cjfields at bioperl.org 2008-10-15 14:55 EST ------- Changing severity level to 'normal' (unable to reproduce the bug, but even if it reproduces it does not block functionality of all of bioperl, just one module). -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Wed Oct 15 14:57:31 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 15 Oct 2008 14:57:31 -0400 Subject: [Bioperl-guts-l] [14934] bioperl-live/trunk/Bio/AlignIO/arp.pm: add -source for ARP Message-ID: <200810151857.m9FIvVaV010112@dev.open-bio.org> Revision: 14934 Author: cjfields Date: 2008-10-15 14:57:31 -0400 (Wed, 15 Oct 2008) Log Message: ----------- add -source for ARP Modified Paths: -------------- bioperl-live/trunk/Bio/AlignIO/arp.pm Modified: bioperl-live/trunk/Bio/AlignIO/arp.pm =================================================================== --- bioperl-live/trunk/Bio/AlignIO/arp.pm 2008-10-15 18:53:01 UTC (rev 14933) +++ bioperl-live/trunk/Bio/AlignIO/arp.pm 2008-10-15 18:57:31 UTC (rev 14934) @@ -105,7 +105,7 @@ sub next_aln { my $self = shift; - my $aln = Bio::SimpleAlign->new(); + my $aln = Bio::SimpleAlign->new(-source => 'arp'); my ($data, $cur_block, $cur_type, $cur_data); SCAN: while (defined ($data = $self->_readline) ) { From bugzilla-daemon at portal.open-bio.org Wed Oct 15 15:13:36 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 15 Oct 2008 15:13:36 -0400 Subject: [Bioperl-guts-l] [Bug 2614] invoking "use Bio::Tools::dpAlign; " produced error In-Reply-To: Message-ID: <200810151913.m9FJDac5024763@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2614 andreas.lehmann at fccc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |WORKSFORME ------- Comment #3 from andreas.lehmann at fccc.edu 2008-10-15 15:13 EST ------- Following Comment #1 by Chris Fields resolved the issue. I had two different versions of bioperl installed because I used a precompiled binary obtained through Ubuntu Synaptic (GUI for apt-get) for bioperl 1.5.2 and I compiled bioperl-ext 1.4.? myself to go with it. I had no error messages and the test of Ext/dpAlign was positive (while staden didn't go so well). After uninstalling all of the above, I reinstalled bioperl 1.5.2 and bioperl-ext 1.5.2 both from SVN and the bug is not reproduced. Thanks for your help!!! -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From heikki at dev.open-bio.org Fri Oct 17 01:45:12 2008 From: heikki at dev.open-bio.org (Heikki Lehvaslaiho) Date: Fri, 17 Oct 2008 01:45:12 -0400 Subject: [Bioperl-guts-l] [14935] bioperl-live/trunk/Bio/TreeIO/nexus.pm: Write out tags color and label as a comment understood by the FigTree program Message-ID: <200810170545.m9H5jC0C023463@dev.open-bio.org> Revision: 14935 Author: heikki Date: 2008-10-17 01:45:11 -0400 (Fri, 17 Oct 2008) Log Message: ----------- Write out tags color and label as a comment understood by the FigTree program Modified Paths: -------------- bioperl-live/trunk/Bio/TreeIO/nexus.pm Modified: bioperl-live/trunk/Bio/TreeIO/nexus.pm =================================================================== --- bioperl-live/trunk/Bio/TreeIO/nexus.pm 2008-10-15 18:57:31 UTC (rev 14934) +++ bioperl-live/trunk/Bio/TreeIO/nexus.pm 2008-10-17 05:45:11 UTC (rev 14935) @@ -26,6 +26,17 @@ This is a driver module for parsing PAUP Nexus tree format which basically is just a remapping of trees. +=head2 Comments + +The nexus format allows node comments that are placed inside square +brackets. Usually the comments (implemented as tags for nodes) are +used to give a name for an internal node or record the bootstap value, +but other uses are possible. + +The FigTree program by Andrew Rambaut adds various rendering +parameters inside comments and flags these comments by starting them +with '&!'. The parameters implemented here are 'label' and 'color'. + =head1 FEEDBACK =head2 Mailing Lists @@ -267,7 +278,7 @@ foreach my $n ( $node->each_Descendent() ) { push @data, _write_tree_Helper( $n, $node2num ); } - if ( @data > 1 ) { + if ( @data > 1 ) { # internal node $data[0] = "(" . $data[0]; $data[-1] .= ")"; @@ -288,16 +299,41 @@ } elsif ( defined( $b = $node->id ) ) { $b = $node2num->{$b} if ( $node2num->{$b} ); # translate node2num - $data[-1] .= sprintf( "[%s]", $b ); + $data[-1] .= sprintf( "[%s]", $b ) if defined $b; } + # FigTree comments start + my $comment_flag; + $comment_flag = 0 + if ( $node->has_tag('color') or $node->has_tag('label') ); + + $data[-1] .= '[&!' if defined $comment_flag; + + if ( $node->has_tag('color')) { + my $color = $node->get_tag_values('color'); + $data[-1] .= "color=$color"; + $comment_flag++; + } + if ( $node->has_tag('label')) { + my $label = $node->get_tag_values('label'); + $data[-1] .= ',' if $comment_flag; + $data[-1] .= 'label="'. $label. '"'; + } + $data[-1] .= ']' if defined $comment_flag; + # FigTree comments end + + } - else { + else { # leaf node if ( defined $node->id || defined $node->branch_length ) { my $id = defined $node->id ? $node->id : ''; if ( length($id) && $node2num->{$id} ) { $id = $node2num->{$id}; } + if ( $node->has_tag('color')) { + my ($color) = $node->get_tag_values('color'); + $id .= "[&!color=$color\]"; + } push @data, sprintf( "%s%s", $id, From heikki at dev.open-bio.org Sat Oct 18 02:23:07 2008 From: heikki at dev.open-bio.org (Heikki Lehvaslaiho) Date: Sat, 18 Oct 2008 02:23:07 -0400 Subject: [Bioperl-guts-l] [14936] bioperl-live/trunk/t/LocatableSeq.t: revcom on a Bio:: LocatableSeq does not change the outcome of column_from_residue_number() no matter how counterintutive it is. Message-ID: <200810180623.m9I6N7a5025630@dev.open-bio.org> Revision: 14936 Author: heikki Date: 2008-10-18 02:23:05 -0400 (Sat, 18 Oct 2008) Log Message: ----------- revcom on a Bio::LocatableSeq does not change the outcome of column_from_residue_number() no matter how counterintutive it is. The main reason is that revcom on sequence that is part of the alignment is a non-biological process. Modified Paths: -------------- bioperl-live/trunk/t/LocatableSeq.t Modified: bioperl-live/trunk/t/LocatableSeq.t =================================================================== --- bioperl-live/trunk/t/LocatableSeq.t 2008-10-17 05:45:11 UTC (rev 14935) +++ bioperl-live/trunk/t/LocatableSeq.t 2008-10-18 06:23:05 UTC (rev 14936) @@ -7,7 +7,7 @@ use lib 't/lib'; use BioperlTest; - test_begin(-tests => 83); + test_begin(-tests => 86); use_ok('Bio::LocatableSeq'); use_ok('Bio::AlignIO'); @@ -26,6 +26,7 @@ is $seq->strand, 1; is $seq->no_gaps, 1; is $seq->column_from_residue_number(4), 9; +is $seq->column_from_residue_number(3), 5; ok $loc = $seq->location_from_column(4); isa_ok $loc,'Bio::Location::Simple'; @@ -90,6 +91,8 @@ is $seq2->start, $seq->start; is $seq2->end, $seq->end; is $seq2->strand, $seq->strand * -1; +is $seq2->column_from_residue_number(4), 9; +is $seq2->column_from_residue_number(3), 5; # test column-mapping for -1 strand sequence $seq = Bio::LocatableSeq->new( From fangly at dev.open-bio.org Sun Oct 19 17:50:20 2008 From: fangly at dev.open-bio.org (Florent E Angly) Date: Sun, 19 Oct 2008 17:50:20 -0400 Subject: [Bioperl-guts-l] [14937] bioperl-live/trunk: [bug 2441] Singlets as Bio::Assembly:: Singlet objects Message-ID: <200810192150.m9JLoKVw004478@dev.open-bio.org> Revision: 14937 Author: fangly Date: 2008-10-19 17:50:19 -0400 (Sun, 19 Oct 2008) Log Message: ----------- [bug 2441] Singlets as Bio::Assembly::Singlet objects * documentation update of Bio::Assembly::ScaffoldI and Bio::Assembly::Scaffold * verification of input object type in Bio::Assembly::Scaffold * Bio::Assembly::IO::phrap modification to generate Bio::Assembly::Singlet objects Modified Paths: -------------- bioperl-live/trunk/Bio/Assembly/IO/phrap.pm bioperl-live/trunk/Bio/Assembly/Scaffold.pm bioperl-live/trunk/Bio/Assembly/ScaffoldI.pm bioperl-live/trunk/Bio/Assembly/Singlet.pm bioperl-live/trunk/t/Assembly.t Modified: bioperl-live/trunk/Bio/Assembly/IO/phrap.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/IO/phrap.pm 2008-10-18 06:23:05 UTC (rev 14936) +++ bioperl-live/trunk/Bio/Assembly/IO/phrap.pm 2008-10-19 21:50:19 UTC (rev 14937) @@ -45,11 +45,11 @@ yourself. See L and L.. -This driver also loads singlets into the assembly contigs as Bio::Seq -objects, altough without their sequence strings. It also adds a -feature for the entire sequence, thus storing the singlet length in -its end position, and adds a tag '_nof_trimmed_nonX' to the feature, -which stores the number of non-vector bases in the singlet. +This driver also loads singlets into the assembly contigs as +Bio::Assembly::Singlet objects, altough without their sequence strings. +It also adds a feature for the entire sequence, thus storing the singlet +length in its end position, and adds a tag '_nof_trimmed_nonX' to the +feature, which stores the number of non-vector bases in the singlet. =head2 Implementation @@ -142,6 +142,7 @@ use strict; use Bio::Assembly::Scaffold; +use Bio::Assembly::Singlet; use Bio::Assembly::Contig; use Bio::LocatableSeq; use Bio::Seq; @@ -160,133 +161,146 @@ =cut sub next_assembly { - my $self = shift; # Package reference + my $self = shift; # Package reference - # Resetting assembly data structure - my $Assembly = Bio::Assembly::Scaffold->new(-source=>'phrap'); + # Resetting assembly data structure + my $Assembly = Bio::Assembly::Scaffold->new(-source=>'phrap'); - # Looping over all phrap out file lines - my ($contigOBJ); - while ($_ = $self->_readline) { - chomp; + # Looping over all phrap out file lines + my ($contigOBJ); + while ($_ = $self->_readline) { + chomp; - # Loading exact dupicated reads list -# /Exact duplicate reads:/ && do { -# my @exact_dupl; -# while () { -# last if (/^\s*$/); -# /(\S+)\s+(\S+)/ && do { -# push(@exact_dupl,[$1,$2]); -# }; -# $self->{'assembly'}{'exact_dupl_reads'} = -# new Data::Table(\@exact_dupl,['included','excluded'],0); -# } -# }; + # Loading exact dupicated reads list + # /Exact duplicate reads:/ && do { + # my @exact_dupl; + # while () { + # last if (/^\s*$/); + # /(\S+)\s+(\S+)/ && do { + # push(@exact_dupl,[$1,$2]); + # }; + # $self->{'assembly'}{'exact_dupl_reads'} = + # new Data::Table(\@exact_dupl,['included','excluded'],0); + # } + # }; - # Loading singlets reads data - /^(\d+) isolated singletons/ && do { - while ($_ = $self->_readline) { - chomp; - last if (/^$/); - if (/^\s+(\S+)\s+(\d+)\s+\((\d+)\)/) { - my $seqID = $1; my $length = $2; - my $nof_trimmed_nonX = $3; - my $seq = Bio::Seq->new(-strand=>1, - -primary_id=>$seqID); - my $f = Bio::SeqFeature::Generic->new - (-start=>1, -end=>$seq->length(), - -primary=>$seq->primary_id(), - -tag=>{ '_nof_trimmed_nonX' => $nof_trimmed_nonX } - ); - $seq->add_SeqFeature($f); - $Assembly->add_singlet($seq); - } - } - }; + # Loading singlets reads data + /^(\d+) isolated singlet/ && do { # should it match 'singlets' and 'singletons'? + while ($_ = $self->_readline) { + chomp; + last if (/^$/); + if (/^\s+(\S+)\s+(\d+)\s+\((\d+)\)/) { + my ($singletID, $length, $nof_trimmed_nonX) = ($1, $2, $3); + # Create singlet object, and add it to scaffold + my $seq = Bio::LocatableSeq->new( + -start => 1, + -end => $length, + -strand => 1, + -id => $singletID, + -primary_id => $singletID, + -alphabet => 'dna'); + my $singletOBJ = Bio::Assembly::Singlet->new( -seq => $seq ); + my $feat = Bio::SeqFeature::Generic->new( + -start => 1, + -end => $length, + -primary => "_main_contig_feature:".$singletOBJ->id(), + -tag => { '_nof_trimmed_nonX' => $nof_trimmed_nonX } + ); + $singletOBJ->add_features([ $feat ],1); + $Assembly->add_singlet($singletOBJ); + } + } + }; + + # Loading contig information + /^Contig (\d+)\.\s+(\d+) reads?; (\d+) bp \(untrimmed\), (\d+) \(trimmed\)\./ && do { + my ($contigID, $nof_reads, $length, $trimmed_length) = ($1, $2, $3, $4); + $contigOBJ = Bio::Assembly::Contig->new( -id => $contigID, + -source => 'phrap' ); + my $feat = Bio::SeqFeature::Generic->new( + -start => 1, + -end => $length, + -primary => "_main_contig_feature:".$contigOBJ->id(), + -tag => { '_trimmed_length' => $trimmed_length } + ); + $contigOBJ->add_features([ $feat ],1); + $Assembly->add_contig($contigOBJ); + }; + + # Loading read information + /^(C?)\s+(-?\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+\(\s*(\d+)\)\s+(\d+\.\d*)\s+(\d+\.\d*)\s+(\d+\.\d*)/ && do { + my ($strand, $start, $end, $readID, $primary_score, $secondary_score, + $substitutions, $deletions, $insertions) = ($1, $2, $3, $4, $5, $6, $7, + $8, $9); + $strand = ($strand eq 'C' ? -1 : 1); + my $seq = Bio::LocatableSeq->new( + -start => $start, + -end => $end, + -strand => $strand, + -id => $readID, + -primary_id => $readID, + -alphabet => 'dna'); + my $unalign_coord = Bio::SeqFeature::Generic->new( + -start => $start, + -end => $end, + -primary => "_unalign_coord:$readID", + -tag => {'_primary_score'=>$primary_score, + '_secondary_score'=>$secondary_score, + '_substitutions'=>$substitutions, + '_insertions'=>,$insertions, + '_deletions'=>$deletions } + ); + $unalign_coord->attach_seq($seq); + $contigOBJ->add_seq($seq); + $contigOBJ->add_features([ $unalign_coord ]); + }; + + # Loading INTERNAL clones description + /INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/ && do { + my $contigID = $1; + my $contig = $Assembly->get_contig_by_id($contigID); + while ($_ = $self->_readline) { + my (@data,$rejected,$c1_strand,$c2_strand); + + (@data = /\s+(\*?)\s+(C?)\s+(\S+)\s+(C?)\s+(\S+)\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)/) && do { + if ($data[0] eq '*') { $rejected = 1 } else { $rejected = 0 } + $c1_strand = ($data[1] eq 'C' ? -1 : 1); + $c2_strand = ($data[3] eq 'C' ? -1 : 1); + (my $clone_name = $data[2]) =~ s/^(\S+)\.\w.*/$1/; + my $clone = Bio::SeqFeature::Generic->new( + -start => $data[6], + -end => $data[7], + -strand => 0, + -primary => "_internal_clone:$clone_name", + -tag => {'_1st_strand'=>,$c1_strand, + '_2nd_strand'=>,$c2_strand, + '_1st_name'=>$data[2], + '_2nd_name'=>$data[4], + '_length'=>$data[5], + '_rejected'=>$rejected} + ); + $contig->add_features([ $clone ]); + }; + + /Covered regions:/ && do { + my %coord = /(\d+)/g; my $i = 0; + foreach my $start (sort { $a <=> $b } keys %coord) { + my $cov = Bio::SeqFeature::Generic->new( + -start => $start, + -end => $coord{$start}, + -primary => '_covered_region:'.++$i + ); + # 1: attach feature to contig consensus, if any + $contig->add_features([ $cov ],1); + } + last; # exit while loop + }; # /Covered regions:/ - # Loading contig information - /^Contig (\d+)\.\s+(\d+) reads?; (\d+) bp \(untrimmed\), (\d+) \(trimmed\)\./ && do { - my $nof_reads = $2; my $length = $3; my $trimmed_length = $4; - $contigOBJ = Bio::Assembly::Contig->new(-id=>$1, -source=>'phrap'); - my $feat = Bio::SeqFeature::Generic->new(-start=>1, - -end=>$length, - -primary=>"_main_contig_feature:".$contigOBJ->id(), - -tag=>{ '_trimmed_length' => $trimmed_length } - ); - $contigOBJ->add_features([ $feat ],1); - $Assembly->add_contig($contigOBJ); - }; + } # while ($_ = $self->_readline) + }; # /INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/ + } # while ($_ = $self->_readline) - # Loading read information - /^(C?)\s+(-?\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+\(\s*(\d+)\)\s+(\d+\.\d*)\s+(\d+\.\d*)\s+(\d+\.\d*)/ && do { - my $strand = ($1 eq 'C' ? -1 : 1); - my $readID = $4; my $start = $2; my $end = $3; - my $primary_score = $5; my $secondary_score = $6; - my $substitutions = $7; my $deletions = $8; my $insertions = $9; - my $seq = Bio::LocatableSeq->new(-start=>$start, - -end=>$end, - -strand=>$strand, - -id=>$readID, - -primary_id=>$readID, - -alphabet=>'dna'); - my $unalign_coord = Bio::SeqFeature::Generic->new(-start=>$start, - -end=>$end, - -primary=>"_unalign_coord:$readID", - -tag=>{'_primary_score'=>$primary_score, - '_secondary_score'=>$secondary_score, - '_substitutions'=>$substitutions, - '_insertions'=>,$insertions, - '_deletions'=>$deletions } - ); - $unalign_coord->attach_seq($seq); - $contigOBJ->add_seq($seq); $contigOBJ->add_features([ $unalign_coord ]); - }; - - # Loading INTERNAL clones description - /INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/ && do { - my $contigID = $1; - my $contig = $Assembly->get_contig_by_id($contigID); - while ($_ = $self->_readline) { - my (@data,$rejected,$c1_strand,$c2_strand); - - (@data = /\s+(\*?)\s+(C?)\s+(\S+)\s+(C?)\s+(\S+)\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)/) && do { - if ($data[0] eq '*') { $rejected = 1 } else { $rejected = 0 } - $c1_strand = ($data[1] eq 'C' ? -1 : 1); @@ Diff output truncated at 10000 characters. @@ From bugzilla-daemon at portal.open-bio.org Sun Oct 19 18:06:10 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 19 Oct 2008 18:06:10 -0400 Subject: [Bioperl-guts-l] [Bug 2620] New: Bio::Assembly::IO::phrap is incomplete Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2620 Summary: Bio::Assembly::IO::phrap is incomplete Product: BioPerl Version: main-trunk Platform: PC OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Core Components AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: florent.angly at gmail.com Bio::Assembly::IO::phrap is incomplete; it does not load the actual sequence strings, nor the quality scores, just the positions and other information. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Oct 19 18:08:39 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 19 Oct 2008 18:08:39 -0400 Subject: [Bioperl-guts-l] [Bug 2441] Bio::Assembly treatment of singletons is inconsistent In-Reply-To: Message-ID: <200810192208.m9JM8db8007129@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2441 florent.angly at gmail.com changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #2 from florent.angly at gmail.com 2008-10-19 18:08 EST ------- I think I fixed the inconsistencies, added some object type checks, and fixed the phrap parser to use singlet objects. So let me close this bug, but I opened a new one because the phrap parser has some limitations: http://bugzilla.open-bio.org/show_bug.cgi?id=2620 -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Oct 19 18:15:40 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 19 Oct 2008 18:15:40 -0400 Subject: [Bioperl-guts-l] [Bug 2451] Suggestions for name of module and module improvement In-Reply-To: Message-ID: <200810192215.m9JMFeht007992@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2451 ------- Comment #4 from florent.angly at gmail.com 2008-10-19 18:15 EST ------- Well, I think this can be closed. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Oct 19 18:16:07 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 19 Oct 2008 18:16:07 -0400 Subject: [Bioperl-guts-l] [Bug 2451] Suggestions for name of module and module improvement In-Reply-To: Message-ID: <200810192216.m9JMG7Ep008043@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2451 florent.angly at gmail.com changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #5 from florent.angly at gmail.com 2008-10-19 18:16 EST ------- Well, I think this can be closed. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From fangly at dev.open-bio.org Sun Oct 19 23:56:55 2008 From: fangly at dev.open-bio.org (Florent E Angly) Date: Sun, 19 Oct 2008 23:56:55 -0400 Subject: [Bioperl-guts-l] [14938] bioperl-live/trunk: Cleanup of the Bio::Assembly::Singlet module Message-ID: <200810200356.m9K3utBC005392@dev.open-bio.org> Revision: 14938 Author: fangly Date: 2008-10-19 23:56:54 -0400 (Sun, 19 Oct 2008) Log Message: ----------- Cleanup of the Bio::Assembly::Singlet module Modified Paths: -------------- bioperl-live/trunk/Bio/Assembly/IO/ace.pm bioperl-live/trunk/Bio/Assembly/IO/phrap.pm bioperl-live/trunk/Bio/Assembly/IO/tigr.pm bioperl-live/trunk/Bio/Assembly/Scaffold.pm bioperl-live/trunk/Bio/Assembly/Singlet.pm bioperl-live/trunk/t/Assembly.t Modified: bioperl-live/trunk/Bio/Assembly/IO/ace.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/IO/ace.pm 2008-10-19 21:50:19 UTC (rev 14937) +++ bioperl-live/trunk/Bio/Assembly/IO/ace.pm 2008-10-20 03:56:54 UTC (rev 14938) @@ -401,9 +401,7 @@ } else { $adder = $seq; } - $adder->{phdfilename} = $phdfilename; - $adder->{chromatfilename} = $chromatfilename; - $singlet->seq_to_singlet($adder); + $singlet->seqref($adder); $assembly->add_singlet($singlet); } } Modified: bioperl-live/trunk/Bio/Assembly/IO/phrap.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/IO/phrap.pm 2008-10-19 21:50:19 UTC (rev 14937) +++ bioperl-live/trunk/Bio/Assembly/IO/phrap.pm 2008-10-20 03:56:54 UTC (rev 14938) @@ -192,14 +192,14 @@ if (/^\s+(\S+)\s+(\d+)\s+\((\d+)\)/) { my ($singletID, $length, $nof_trimmed_nonX) = ($1, $2, $3); # Create singlet object, and add it to scaffold - my $seq = Bio::LocatableSeq->new( + my $seq = Bio::Seq->new( -start => 1, -end => $length, -strand => 1, -id => $singletID, -primary_id => $singletID, -alphabet => 'dna'); - my $singletOBJ = Bio::Assembly::Singlet->new( -seq => $seq ); + my $singletOBJ = Bio::Assembly::Singlet->new(-seqref=>$seq); my $feat = Bio::SeqFeature::Generic->new( -start => 1, -end => $length, Modified: bioperl-live/trunk/Bio/Assembly/IO/tigr.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/IO/tigr.pm 2008-10-19 21:50:19 UTC (rev 14937) +++ bioperl-live/trunk/Bio/Assembly/IO/tigr.pm 2008-10-20 03:56:54 UTC (rev 14938) @@ -574,8 +574,7 @@ ); # Create singlet from sequence and add it to scaffold - my $singletobj = Bio::Assembly::Singlet->new; - $singletobj->seq_to_singlet($seqobj); + my $singletobj = Bio::Assembly::Singlet->new( -seqref => $seqobj ); $scaffoldobj->add_singlet($singletobj); # Add other misc contig information as features of the singlet Modified: bioperl-live/trunk/Bio/Assembly/Scaffold.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/Scaffold.pm 2008-10-19 21:50:19 UTC (rev 14937) +++ bioperl-live/trunk/Bio/Assembly/Scaffold.pm 2008-10-20 03:56:54 UTC (rev 14938) @@ -460,7 +460,7 @@ $singlet->assembly($self); # weak circular reference # Put singlet sequence in the list of sequences belonging to the scaffold - my $seqID = $singlet->seqref()->id(); + my $seqID = $singlet->id(); if (exists $self->{'_seqs'}{$seqID} && not($self->{'_seqs'}{$seqID} eq $singlet) ) { $self->warn( "Sequence $seqID already assigned to object ". @@ -506,7 +506,7 @@ # Put singlet sequences in the list of sequences belonging to the scaffold foreach my $singlet ($self->all_singlets) { - my $seqID = $singlet->seqref()->id(); + my $seqID = $singlet->id(); my $singletID = $singlet->id(); if (exists $self->{'_seqs'}{$seqID} && not($self->{'_seqs'}{$seqID} eq $singlet) ) { Modified: bioperl-live/trunk/Bio/Assembly/Singlet.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/Singlet.pm 2008-10-19 21:50:19 UTC (rev 14937) +++ bioperl-live/trunk/Bio/Assembly/Singlet.pm 2008-10-20 03:56:54 UTC (rev 14938) @@ -21,14 +21,19 @@ use Bio::Assembly::IO; # Assembly loading methods - $aio = Bio::Assembly::IO->new(-file=>"test.ace.1", - -format=>'phrap'); + $aio = Bio::Assembly::IO->new( -file => 'test.ace.1', + -format => 'phrap' ); $assembly = $aio->next_assembly; foreach $singlet ($assembly->all_singlets) { # do something } + # OR, if you want to build the singlet yourself, + + use Bio::Assembly::Singlet; + $singlet = Bio::Assembly::Singlet->new( -seqref => $seq ); + =head1 DESCRIPTION A singlet is a sequence that phrap was unable to align to any other sequences. @@ -70,128 +75,74 @@ use Bio::SeqFeature::Collection; use Bio::Seq::PrimaryQual; -use Dumpvalue(); -my $dumper = new Dumpvalue(); -$dumper->veryCompact(1); use base qw(Bio::Assembly::Contig Bio::Root::Root Bio::Align::AlignI); +=head2 new + Title : new + Usage : $singlet = $io->new( -seqref => $seq ) + Function: Create a new singlet object + Returns : A Bio::Assembly::Singlet object + Args : -seqref => Bio::Seq-compliant sequence object for the singlet + +=cut + sub new { my ($class,%ARG) = @_; my $self = $class->SUPER::new(%ARG); my $args = \%ARG; bless ($self,$class); - if ($args->{'-seq'}) { - $self->seq_to_singlet($args->{'-seq'}); + $self->{'_seqref'} = undef; + if ($args->{'-seqref'}) { + $self->seqref($args->{'-seqref'}); } return $self; } -=head2 seq_to_singlet - - Title : seq_to_singlet - Usage : my $singlet = $io->seq_to_singlet($seq) - Function: Wrap the information for a singlet as a Bio::Assembly::Singlet - Returns : A Bio::Assembly::Singlet object - Args : A Bio::Seq-compliant object - -=cut - -sub seq_to_singlet { - my ($self, $seq) = @_; - $self->seqref($seq); - $self->strand(1); - my $lseq = Bio::LocatableSeq->new( - -seq => $seq->seq(), - -start => 1, - -end => $seq->length(), - -id => $seq->display_id() ); - $lseq->{chromatfilename} = $seq->{'chromatfilename'}; - $lseq->{phdfilename} = $seq->{'phdfilename'}; - $self->set_consensus_sequence($lseq); - if (UNIVERSAL::isa($seq,"Bio::Seq::Quality")) { - $self->set_consensus_quality($seq) - } else { - # print("seq_to_singlet: the sequence (".$seq->desc().") is not a Bio::Seq::quality. it is this ($seq)\n"); - } - $self->add_seq($lseq); -} - - -=head2 id - - Title : id - Usage : my $id = $singlet->id('chad matsalla') - Function: - Returns : - Args : - -=cut - -sub id { - my $self = shift; - # print("Getting the id for this thing:\n"); - # $dumper->dumpValue($self->seqref()); - # print("This is the id: (".$self->seqref()->id().")\n"); - my $id = undef; - if (defined($self->seqref())) { - $id = $self->seqref()->id(); - } else { - $self->warn("This singlet has no ID because no Bio::Seq-compliant ". - "sequence is attached to it"); - } - return $id; -} - - =head2 seqref Title : seqref - Usage : my $seqref = $singlet->seqref($seq); - Function: Set the sequence to which this Singlet refers + Usage : $seqref = $singlet->seqref($seq); + Function: Get/set the sequence to which this singlet refers Returns : A Bio::Seq-compliant object - Args : + Args : A Bio::Seq-compliant object =cut sub seqref { my ($self,$seq) = @_; - if ($seq) { $self->{'seqref'} = $seq; } - return $self->{'seqref'}; + if (defined $seq) { $self->_seq_to_singlet($seq) }; + return $self->{'_seqref'}; } +=head2 _seq_to_singlet -=head2 chromatfilename + Title : _seq_to_singlet + Usage : $singlet->seqref($seq) + Function: Transform a sequence into a singlet + Returns : A Bio::Assembly::Singlet object + Args : A Bio::Seq-compliant object - Title : chromatfilename - Usage : my $chromatfilename = $singlet->chromatfilename($newfilename); - Function: Get the name of the chromatfile for this singlet - Returns : A string. - Args : If a string is provided, the chromatfilename will be set to that value. - =cut -sub chromatfilename { - my ($self,$name) = @_; - if ($name) { $self->{'chromatfilename'} = $name; } - return $self->{'chromatfilename'}; +sub _seq_to_singlet { + my ($self, $seq) = @_; + $self->throw("Unable to process non Bio::Seq-compliant object [",ref($seq),"]") + unless (defined $seq && $seq->isa("Bio::Seq")); + $self->{'_seqref'} = $seq; + my $lseq = Bio::LocatableSeq->new( + -seq => $seq->seq(), + -start => 1, + -end => $seq->length(), + -strand => 1, + -id => $seq->display_id() ); + $self->set_consensus_sequence($lseq); + if ($seq->isa("Bio::Seq::Quality")) { + $self->set_consensus_quality($seq) + } + $self->add_seq($lseq); + $self->id($seq->id()); + return; } -=head2 phdfilename - - Title : phdfilename - Usage : my $phdfilename = $singlet->phdfilename($newfilename); - Function: Get the name of the phdfile for this singlet - Returns : A string. - Args : If a string is provided, the phdfilename will be set to that value. - -=cut - -sub phdfilename { - my ($self,$name) = @_; - if ($name) { $self->{phdfilename} = $name; } - return $self->{'phdfilename'}; -} - - 1; Modified: bioperl-live/trunk/t/Assembly.t =================================================================== --- bioperl-live/trunk/t/Assembly.t 2008-10-19 21:50:19 UTC (rev 14937) +++ bioperl-live/trunk/t/Assembly.t 2008-10-20 03:56:54 UTC (rev 14938) @@ -7,7 +7,7 @@ use lib 't/lib'; use BioperlTest; - test_begin(-tests => 35, + test_begin(-tests => 41, -requires_module => 'DB_File'); use_ok('Bio::Assembly::IO'); @@ -24,13 +24,14 @@ isa_ok($in, 'Bio::Assembly::IO'); my $sc = $in->next_assembly; + + isa_ok($sc, 'Bio::Assembly::Scaffold'); # # Testing Scaffold # - is $sc->id, "NoName"; is $sc->id('test'), "test"; @@ -61,6 +62,7 @@ ); my $assembly = $aio->next_assembly(); + my @contigs = $assembly->all_contigs(); my $direction = $contigs[0]->strand; @@ -85,6 +87,14 @@ } is $had_tag, 2; +is $assembly->get_nof_contigs, 1; +is $assembly->get_nof_sequences_in_contigs, 2; +is($assembly->get_nof_singlets, 2, "get_nof_singlets"); +is($assembly->get_seq_ids, 2, "get_seq_ids"); +is($assembly->get_contig_ids, 1, "get_contig_ids"); @@ Diff output truncated at 10000 characters. @@ From fangly at dev.open-bio.org Mon Oct 20 19:06:10 2008 From: fangly at dev.open-bio.org (Florent E Angly) Date: Mon, 20 Oct 2008 19:06:10 -0400 Subject: [Bioperl-guts-l] [14939] bioperl-live/trunk/Bio/Assembly/Singlet.pm: Additional checks in the singlet code Message-ID: <200810202306.m9KN6At6014613@dev.open-bio.org> Revision: 14939 Author: fangly Date: 2008-10-20 19:06:09 -0400 (Mon, 20 Oct 2008) Log Message: ----------- Additional checks in the singlet code Modified Paths: -------------- bioperl-live/trunk/Bio/Assembly/Singlet.pm Modified: bioperl-live/trunk/Bio/Assembly/Singlet.pm =================================================================== --- bioperl-live/trunk/Bio/Assembly/Singlet.pm 2008-10-20 03:56:54 UTC (rev 14938) +++ bioperl-live/trunk/Bio/Assembly/Singlet.pm 2008-10-20 23:06:09 UTC (rev 14939) @@ -88,13 +88,12 @@ =cut sub new { - my ($class,%ARG) = @_; - my $self = $class->SUPER::new(%ARG); - my $args = \%ARG; - bless ($self,$class); + my ($class, @args) = @_; + my $self = $class->SUPER::new(@args); + my ($seqref) = $self->_rearrange([qw(SEQREF)], @args); $self->{'_seqref'} = undef; - if ($args->{'-seqref'}) { - $self->seqref($args->{'-seqref'}); + if (defined $seqref) { + $self->seqref($seqref); } return $self; } @@ -126,22 +125,31 @@ =cut sub _seq_to_singlet { - my ($self, $seq) = @_; + my ($self, $seq) = @_; + # Object type checking $self->throw("Unable to process non Bio::Seq-compliant object [",ref($seq),"]") unless (defined $seq && $seq->isa("Bio::Seq")); - $self->{'_seqref'} = $seq; + # Sanity check + $self->throw("Unable to have more than one seqref in a s