From bugzilla-daemon at portal.open-bio.org Sat Mar 1 09:14:11 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 1 Mar 2008 09:14:11 -0500 Subject: [Bioperl-guts-l] [Bug 2305] Use of uninitialized value in pattern match (m//) at /usr/lib/perl5/site_perl/5.8.8/Bio/SeqIO/genbank.pm line 352, line 4 In-Reply-To: Message-ID: <200803011414.m21EEBWC009870@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2305 ------- Comment #14 from mmokrejs at ribosome.natur.cuni.cz 2008-03-01 09:14 EST ------- So current cvs version of bp_seqconvert.pl does this: $ bp_seqconvert.pl --from genbank --to embl < tmp/pGL3R.gb --------------------- WARNING --------------------- MSG: Bad LOCUS name? Changing [6499] to 'unknown' and length to 6499 --------------------------------------------------- --------------------- WARNING --------------------- MSG: cannot see new qualifier in feature misc_feature: 184, 190, 1 --------------------------------------------------- ID unknown; SV 1; linear; unassigned DNA; STD; UNC; 6499 BP. XX AC unknown; XX XX XX CC ApEinfo:methylated:1 ... In principle it is ok, I would prefer it would append the double-quotes around the malformed(split) line but maybe you are right it is not so simple and generic fix. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sat Mar 1 09:15:10 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 1 Mar 2008 09:15:10 -0500 Subject: [Bioperl-guts-l] [Bug 2305] Use of uninitialized value in pattern match (m//) at /usr/lib/perl5/site_perl/5.8.8/Bio/SeqIO/genbank.pm line 352, line 4 In-Reply-To: Message-ID: <200803011415.m21EFA0R009972@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2305 ------- Comment #15 from mmokrejs at ribosome.natur.cuni.cz 2008-03-01 09:15 EST ------- To make it clear, the continuation of the broken line is just ignored and does not appear in the resulting EMBL formatted file. So, that's correct. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Sat Mar 1 09:26:06 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 1 Mar 2008 09:26:06 -0500 Subject: [Bioperl-guts-l] [14578] bioperl-live/trunk/scripts/seq/seqconvert.PLS: add driver/ handler support (will clean these up more for 1.7) Message-ID: <200803011426.m21EQ6Tr027181@dev.open-bio.org> Revision: 14578 Author: cjfields Date: 2008-03-01 09:26:05 -0500 (Sat, 01 Mar 2008) Log Message: ----------- add driver/handler support (will clean these up more for 1.7) Modified Paths: -------------- bioperl-live/trunk/scripts/seq/seqconvert.PLS Modified: bioperl-live/trunk/scripts/seq/seqconvert.PLS =================================================================== --- bioperl-live/trunk/scripts/seq/seqconvert.PLS 2008-03-01 04:51:01 UTC (rev 14577) +++ bioperl-live/trunk/scripts/seq/seqconvert.PLS 2008-03-01 14:26:05 UTC (rev 14578) @@ -14,7 +14,7 @@ qw(gcg fasta ace raw fastq phd pir scf swiss genbank locuslink embl game qual bsml tab raw abi chado alf ctf exp ztr pln chaosxml chadoxml yaml tigr tigrxml agave chaos kegg interpro - lasergene strider); + lasergene strider gbdriver embldriver swissdriver); my $script=substr($0, 1+rindex($0,'/')); my $usage="Usage: From cjfields at dev.open-bio.org Sat Mar 1 11:58:51 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 1 Mar 2008 11:58:51 -0500 Subject: [Bioperl-guts-l] [14579] bioperl-live/trunk/Bio/TreeIO/nexus.pm: bug 2356 Message-ID: <200803011658.m21GwpK8027376@dev.open-bio.org> Revision: 14579 Author: cjfields Date: 2008-03-01 11:58:51 -0500 (Sat, 01 Mar 2008) Log Message: ----------- bug 2356 Modified Paths: -------------- bioperl-live/trunk/Bio/TreeIO/nexus.pm Modified: bioperl-live/trunk/Bio/TreeIO/nexus.pm =================================================================== --- bioperl-live/trunk/Bio/TreeIO/nexus.pm 2008-03-01 14:26:05 UTC (rev 14578) +++ bioperl-live/trunk/Bio/TreeIO/nexus.pm 2008-03-01 16:58:51 UTC (rev 14579) @@ -166,17 +166,14 @@ else { $self->debug("no translate in: $trees\n"); } - $trees =~ s{\n}{ }g; - while ( - $trees =~ /\s+tree\s+\*?\s*(\S+)\s*\= - \s*(?:\[\S+\])?\s*([^\;]+;)\s*/igx - ) + while ($trees =~ /\s+tree\s+\*?\s*(\S+)\s*\= + \s*(?:\[\S+\])?\s*([^\;]+;)/igx) { my ( $tree_name, $tree_str ) = ( $1, $2 ); # MrBayes does not print colons for node label # $tree_str =~ s/\)(\d*\.\d+)\)/:$1/g; - my $buf = new IO::String($tree_str); + my $buf = IO::String->new($tree_str); my $treeio = Bio::TreeIO->new( -format => 'newick', -fh => $buf From cjfields at dev.open-bio.org Sat Mar 1 12:01:31 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 1 Mar 2008 12:01:31 -0500 Subject: [Bioperl-guts-l] [14580] bioperl-live/trunk/t/TreeIO.t: bug 2356 ( check for correct number with multiple trees) Message-ID: <200803011701.m21H1VSU027406@dev.open-bio.org> Revision: 14580 Author: cjfields Date: 2008-03-01 12:01:30 -0500 (Sat, 01 Mar 2008) Log Message: ----------- bug 2356 (check for correct number with multiple trees) Modified Paths: -------------- bioperl-live/trunk/t/TreeIO.t Modified: bioperl-live/trunk/t/TreeIO.t =================================================================== --- bioperl-live/trunk/t/TreeIO.t 2008-03-01 16:58:51 UTC (rev 14579) +++ bioperl-live/trunk/t/TreeIO.t 2008-03-01 17:01:30 UTC (rev 14580) @@ -7,7 +7,7 @@ use lib 't/lib'; use BioperlTest; - test_begin(-tests => 73); + test_begin(-tests => 74); use_ok('Bio::TreeIO'); } @@ -231,11 +231,16 @@ -file => test_input_file('adh.mb_tree.nexus')); $tree = $treeio->next_tree; + my $ct = 1; ok($tree); is($tree->id, 'rep.1'); is($tree->get_leaf_nodes, 54); ($node) = $tree->find_node(-id => 'd.madeirensis'); is($node->branch_length,0.039223); + while ($tree = $treeio->next_tree) { + $ct++; + } + is($ct,13,'bug 2356'); } # bug #1854 From cjfields at dev.open-bio.org Sat Mar 1 12:15:39 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 1 Mar 2008 12:15:39 -0500 Subject: [Bioperl-guts-l] [14581] bioperl-live/trunk/Bio/TreeIO/pag.pm: add some flexibility for name length, defaulting to 10 Message-ID: <200803011715.m21HFdLq027458@dev.open-bio.org> Revision: 14581 Author: cjfields Date: 2008-03-01 12:15:39 -0500 (Sat, 01 Mar 2008) Log Message: ----------- add some flexibility for name length, defaulting to 10 Modified Paths: -------------- bioperl-live/trunk/Bio/TreeIO/pag.pm Modified: bioperl-live/trunk/Bio/TreeIO/pag.pm =================================================================== --- bioperl-live/trunk/Bio/TreeIO/pag.pm 2008-03-01 17:01:30 UTC (rev 14580) +++ bioperl-live/trunk/Bio/TreeIO/pag.pm 2008-03-01 17:15:39 UTC (rev 14581) @@ -65,10 +65,9 @@ package Bio::TreeIO::pag; -use vars qw($TaxonNameLen); use strict; -$TaxonNameLen = 10; +our $TaxonNameLen = 10; use base qw(Bio::TreeIO); @@ -82,6 +81,17 @@ =cut +sub _initialize { + my $self = shift; + $self->SUPER::_initialize(@_); + my ( $name_length ) = $self->_rearrange( + [ + qw(NAME_LENGTH) + ], + @_ + ); + $self->name_length( defined $name_length ? $name_length : $TaxonNameLen ); +} =head2 write_tree @@ -109,6 +119,7 @@ $special_node, $outgroup_ancestor, $tree_no) = (0,0,1); + my $name_len = $self->name_length; if( @args ) { ($no_outgroups, $print_header, @@ -140,10 +151,10 @@ $species_ct++; my $node_name = $node->id; - if( length($node_name)> $TaxonNameLen ) { - $self->warn( "Found a taxon name longer than $TaxonNameLen letters, \n", + if( length($node_name)> $name_len ) { + $self->warn( "Found a taxon name longer than $name_len letters, \n", "name will be abbreviated.\n"); - $node_name = substr($node_name, 0,$TaxonNameLen); + $node_name = substr($node_name, 0,$name_len); } else { # $node_name = sprintf("%-".$TaxonNameLen."s",$node_name); } @@ -225,5 +236,20 @@ $self->throw_not_implemented(); } +=head2 name_length + Title : name_length + Usage : $self->name_length(20); + Function: set mininum taxon name length + Returns : integer (length of name) + Args : integer + +=cut + +sub name_length { + my ($self, $val) = @_; + return $self->{'name_len'} = $val if $val; + return $self->{'name_len'}; +} + 1; From cjfields at dev.open-bio.org Sat Mar 1 12:28:31 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sat, 1 Mar 2008 12:28:31 -0500 Subject: [Bioperl-guts-l] [14582] bioperl-live/trunk/Bio/TreeIO/pag.pm: fix undef. Message-ID: <200803011728.m21HSVxS027489@dev.open-bio.org> Revision: 14582 Author: cjfields Date: 2008-03-01 12:28:31 -0500 (Sat, 01 Mar 2008) Log Message: ----------- fix undef. variable issue when no ancestor indicated (no root and not passed in). Modified Paths: -------------- bioperl-live/trunk/Bio/TreeIO/pag.pm Modified: bioperl-live/trunk/Bio/TreeIO/pag.pm =================================================================== --- bioperl-live/trunk/Bio/TreeIO/pag.pm 2008-03-01 17:15:39 UTC (rev 14581) +++ bioperl-live/trunk/Bio/TreeIO/pag.pm 2008-03-01 17:28:31 UTC (rev 14582) @@ -78,6 +78,7 @@ Function: Builds a new Bio::TreeIO::pag object Returns : an instance of Bio::TreeIO::pag Args : -file/-fh for filename or filehandles + -name_length for minimum name length (default = 10) =cut @@ -126,12 +127,14 @@ $special_node, $outgroup_ancestor, $tree_no, - $keep_outgroup) = $self->_rearrange([qw(NO_OUTGROUPS + $keep_outgroup) = $self->_rearrange([qw( + NO_OUTGROUPS PRINT_HEADER SPECIAL_NODE OUTGROUP_ANCESTOR TREE_NO - KEEP_OUTGROUP)], at args); + KEEP_OUTGROUP + NAME_LENGTH)], at args); } my $newname_base = 1; @@ -187,7 +190,7 @@ foreach my $node (@nodes) { my $i = 0; foreach my $anc (@ancestors) { - if ($node eq $anc) { $i = 1; last } + if ($anc && $node eq $anc) { $i = 1; last } } unless ($i > 0) { # root not given in PAG my $current_name = $names{$node->internal_id}; From bugzilla-daemon at portal.open-bio.org Sat Mar 1 12:30:15 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 1 Mar 2008 12:30:15 -0500 Subject: [Bioperl-guts-l] [Bug 2356] Bio::TreeIO::pag outputs (NEXUS parsing) In-Reply-To: Message-ID: <200803011730.m21HUFqA022070@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2356 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #5 from cjfields at uiuc.edu 2008-03-01 12:30 EST ------- FIxed in svn. You'll need to update TreeIO::nexus and TreeIO::pag. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sat Mar 1 12:33:39 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sat, 1 Mar 2008 12:33:39 -0500 Subject: [Bioperl-guts-l] [Bug 2373] Handling of multiple species or hybrid species in EMBL records In-Reply-To: Message-ID: <200803011733.m21HXd5i022286@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2373 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Target Milestone|1.7 release |1.6 release ------- Comment #3 from cjfields at uiuc.edu 2008-03-01 12:33 EST ------- Should be easy to implement as long as everything is caught by the parser but I'm unsure how bioperl-db would handle this. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Mar 2 13:10:05 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 13:10:05 -0500 Subject: [Bioperl-guts-l] [Bug 2389] load_seqdatabase.pl crashes on varchar(n) In-Reply-To: Message-ID: <200803021810.m22IA5TW024686@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2389 hlapp at gmx.net changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |ASSIGNED ------- Comment #2 from hlapp at gmx.net 2008-03-02 13:10 EST ------- I don't think it's a good idea to just replace all varchar() types with type text. First of all, having reasonable constraints is a Good Thing(tm) in my book as the majority of times I found them violated it revealed a parsing error, rather than the constraints not fitting the data. Second, this won't solve the problem for the other RDBMS versions for which there is a real performance penalty and other implications when having unreasonably large column widths. That said, if the constraint is indeed not compatible with current data (such as Uniprot) we have a problem that needs to be fixed. So, what I would like to find out is 1) is this in reality a parsing error, or is there indeed a value for a column that in BioSQL is constrained to 40 chars, and 2) if so, which column in which table is the problem. Erik - would you mind sending me the full error stack if you still have it? Usually load_seqdatabase.pl will also print an extra warning message saying what it couldn't store. That message would be great too. If you don't have either anymore, do you remember vaguely what those messsages said? Alternatively, do you have the offending uniprot entry (or its accession)? I suspect that it's actually the constraint on dbxref.accession. Does that ring a bell? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Mar 2 13:10:51 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 13:10:51 -0500 Subject: [Bioperl-guts-l] [Bug 2389] load_seqdatabase.pl crashes on varchar(n) In-Reply-To: Message-ID: <200803021810.m22IApNj024755@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2389 hlapp at gmx.net changed: What |Removed |Added ---------------------------------------------------------------------------- AssignedTo|bioperl-guts-l at bioperl.org |hlapp at gmx.net Status|ASSIGNED |NEW -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Mar 2 17:37:52 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 17:37:52 -0500 Subject: [Bioperl-guts-l] [Bug 2373] Handling of multiple species or hybrid species in EMBL records In-Reply-To: Message-ID: <200803022237.m22MbqDB006623@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2373 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Target Milestone|1.6 release |1.7 release ------- Comment #4 from cjfields at uiuc.edu 2008-03-02 17:37 EST ------- Bumping to 1.7 as BioSQL doesn't handle these just yet (and it will give us a chance to switch completely over to Bio::Taxon). Implementing within BioPerl would not be hard, though. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Mar 2 17:42:02 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 17:42:02 -0500 Subject: [Bioperl-guts-l] [Bug 2213] crashes on downloading NCBI records In-Reply-To: Message-ID: <200803022242.m22Mg2P2006832@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2213 ------- Comment #13 from cjfields at uiuc.edu 2008-03-02 17:42 EST ------- BioSQL is proceeding towards a 1.0 release; we can probably work on getting the bioperl-db bugs fixed after we clean core up. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Sun Mar 2 17:43:32 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 17:43:32 -0500 Subject: [Bioperl-guts-l] [Bug 2347] Bio::Tools::Run::Phylo::PAML::Baseml needs polishing and completion In-Reply-To: Message-ID: <200803022243.m22MhWbh006917@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2347 ------- Comment #1 from cjfields at uiuc.edu 2008-03-02 17:43 EST ------- Sendu, if you are still planning on doing this could you look at bug 2218 (which relates to PAML as well)? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Sun Mar 2 18:28:41 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sun, 2 Mar 2008 18:28:41 -0500 Subject: [Bioperl-guts-l] [14583] bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm: Bug 2378 Message-ID: <200803022328.m22NSfUB001849@dev.open-bio.org> Revision: 14583 Author: cjfields Date: 2008-03-02 18:28:40 -0500 (Sun, 02 Mar 2008) Log Message: ----------- Bug 2378 Modified Paths: -------------- bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm Modified: bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm =================================================================== --- bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm 2008-03-01 17:28:31 UTC (rev 14582) +++ bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm 2008-03-02 23:28:40 UTC (rev 14583) @@ -151,10 +151,16 @@ -rank => HSP rank -links => HSP links information (WU-BLAST only) -hsp_group => HSP Group informat (WU-BLAST only) + -stranded => If the algorithm isn't known (i.e. defaults to + 'generic'), setting this will indicate start/end + coordinates are to be used to determine the strand + for 'query', 'subject', 'hit', 'both', or 'none' + (default = 'none') + =cut sub new { - my($class, at args) = @_; + my($class,%args) = @_; # don't pass anything to SUPER; complex heirarchy results in lots of work # for nothing @@ -163,7 +169,6 @@ # for speed, don't use _rearrange and just store all input data directly # with no method calls and no work done. work can be carried # out just-in-time later if desired - my %args = @args; while (my ($arg, $value) = each %args) { $arg =~ tr/a-z\055/A-Z/d; $self->{$arg} = $value; @@ -173,6 +178,7 @@ defined $self->{VERBOSE} && $self->verbose($self->{VERBOSE}); $self->{ALGORITHM} ||= 'GENERIC'; + $self->{STRANDED} ||= 'NONE'; if (! defined $self->{QUERY_LENGTH} || ! defined $self->{HIT_LENGTH}) { $self->throw("Must define hit and query length"); @@ -1314,6 +1320,11 @@ $queryfactor = ($algo =~ /^RPS-BLAST\(BLASTX\)/) ? 1 : 0; $hitfactor = 0; } + else { + my $stranded = substr($self->{STRANDED}, 0,1); + $queryfactor = ($stranded eq 'q' || $stranded eq 'b') ? 1 : 0; + $hitfactor = ($stranded eq 'h' || $stranded eq 's' || $stranded eq 'b') ? 1 : 0; + } $self->{_query_factor} = $queryfactor; $self->{_hit_factor} = $hitfactor; } From bugzilla-daemon at portal.open-bio.org Sun Mar 2 18:29:28 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 18:29:28 -0500 Subject: [Bioperl-guts-l] [Bug 2378] Getting strand of query/hit features in Bio::Search::HSP::GenericHSP In-Reply-To: Message-ID: <200803022329.m22NTSZt009330@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2378 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #5 from cjfields at uiuc.edu 2008-03-02 18:29 EST ------- Okay, added in this functionality to GenericHSP (passed in via new): -stranded => If the algorithm isn't known (i.e. defaults to 'generic'), setting this will indicate start/end coordinates are to be used to determine the strand for 'query', 'subject', 'hit', 'both', or 'none' (default = 'none') So doing the following: my $bphsp1 = Bio::Search::HSP::GenericHSP->new( -query_start => 1, -query_end => 18, -query_name => 'a', -query_length => 10000, -hit_start => 47360, -hit_end => 47343, -hit_name => 'b', -hit_length => 10000, -identical => 100, -conserved => 100, -stranded => 'both' ); indicates that coordinates can be used to determine both strands. This now passes your test script. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From lstein at dev.open-bio.org Sun Mar 2 19:05:44 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Sun, 2 Mar 2008 19:05:44 -0500 Subject: [Bioperl-guts-l] [14584] bioperl-live/trunk: revamped featurefile significantly in order for syntax to be compatible with GBrowse , and to support loading of featurefiles into GFF3 databases Message-ID: <200803030005.m2305heA001937@dev.open-bio.org> Revision: 14584 Author: lstein Date: 2008-03-02 19:05:37 -0500 (Sun, 02 Mar 2008) Log Message: ----------- revamped featurefile significantly in order for syntax to be compatible with GBrowse, and to support loading of featurefiles into GFF3 databases Modified Paths: -------------- bioperl-live/trunk/Bio/DB/GFF/Typename.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm bioperl-live/trunk/Bio/Graphics/FeatureBase.pm bioperl-live/trunk/Bio/Graphics/FeatureFile.pm bioperl-live/trunk/Bio/Graphics/Glyph.pm bioperl-live/trunk/t/BioGraphics.t Added Paths: ----------- bioperl-live/trunk/t/data/biographics/t1/version10.png bioperl-live/trunk/t/data/biographics/t1/version3.gif bioperl-live/trunk/t/data/biographics/t2/version15.png bioperl-live/trunk/t/data/biographics/t2/version3.gif bioperl-live/trunk/t/data/biographics/t3/version3.gif bioperl-live/trunk/t/data/biographics/t3/version9.png Modified: bioperl-live/trunk/Bio/DB/GFF/Typename.pm =================================================================== --- bioperl-live/trunk/Bio/DB/GFF/Typename.pm 2008-03-02 23:28:40 UTC (rev 14583) +++ bioperl-live/trunk/Bio/DB/GFF/Typename.pm 2008-03-03 00:05:37 UTC (rev 14584) @@ -52,7 +52,7 @@ my ($method,$source) = @_; $method ||= ''; $source ||= ''; - if ($source eq '' && $method =~ /^(\w+):(\w+)$/) { + if ($source eq '' && $method =~ /^(\w+):(\w*)$/) { $method = $1; $source = $2; } Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm 2008-03-02 23:28:40 UTC (rev 14583) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm 2008-03-03 00:05:37 UTC (rev 14584) @@ -300,7 +300,7 @@ $load_data->{mode} = 'fff' if /\s/; # if it has any whitespace in # it, then back to fff mode - if ($line =~ /^\#\s?\#\s*(.+)/) { ## meta instruction + if ($line =~ /^\#\s?\#\s*([\#]+)/) { ## meta instruction $load_data->{mode} = 'fff'; $self->handle_meta($1); @@ -380,7 +380,7 @@ $self->store_current_feature(); my $type = shift @tokens; my $name = shift @tokens; - $ld->{CurrentGroup} = $self->_make_feature($name,$type); + $ld->{CurrentGroup} = $self->_make_indexed_feature($name,$type,'',{_ff_group=>1}); $ld->{IndexIt}{$name}++; return; } @@ -412,7 +412,7 @@ $type = '' unless defined $type; $name = '' unless defined $name; - $type ||= $ld->{CurrentGroup}->type if $ld->{CurrentGroup}; + $type ||= $ld->{CurrentGroup}->primary_tag if $ld->{CurrentGroup}; my $reference = $ld->{reference} || 'ChrUN'; foreach (@parts) { @@ -424,8 +424,17 @@ ($_->[1],$_->[2]) = ($_->[2],$_->[1]) if $_->[1] > $_->[2]; } $reference = $_->[0] if defined $_->[0]; + $_ = [@{$_}[1,2]]; # strip off the reference. } + # now @parts is an array of [start,end] and $reference contains the seqid + + # apply coordinate mapper + if ($self->{coordinate_mapper} && $reference) { + my @remapped = $self->{coordinate_mapper}->($reference, at parts); + ($reference, at parts) = @remapped if @remapped; + } + # either create a new feature or add a segment to it my $feature = $ld->{CurrentFeature}; if ($feature) { @@ -435,31 +444,45 @@ if ($feature->display_name ne $name || $feature->method ne $type) { $self->store_current_feature; # new feature, store old one - $feature = $ld->{CurrentFeature} = $self->_make_indexed_feature($name,$type, - $strand,$attr); + undef $feature; + } else { # create a new multipart feature + $self->_multilevel_feature($feature) unless $feature->get_SeqFeatures; + my $part = $self->_make_feature($name,$type, + $strand,$attr, + $reference,@{$parts[0]}); + $feature->add_SeqFeature($part); } - - } else { # create new feature - $feature = $ld->{CurrentFeature} = $self->_make_indexed_feature($name,$type, - $strand,$attr); } + $feature ||= $self->_make_indexed_feature($name,$type, # side effect is to set CurrentFeature + $strand,$attr, + $reference,@{$parts[0]}); # add more segments to the current feature - for my $part (@parts) { - $type ||= $feature->primary_tag; - my $sp = $self->_make_feature($name,$type,$strand,$attr, - $reference,$part->[1],$part->[2]); + if (@parts > 1) { + for my $part (@parts) { + $type ||= $feature->primary_tag; + my $sp = $self->_make_feature($name,$type,$strand,$attr, + $reference,@{$part}); $feature->add_SeqFeature($sp); + } } } +sub _multilevel_feature { # turn a single-level feature into a multilevel one + my $self = shift; + my $f = shift; + my @args = ($f->display_name,$f->type,$f->strand,{},$f->seq_id,$f->start,$f->end); + my $subpart = $self->_make_feature(@args); + $f->add_SeqFeature($subpart); +} + sub _make_indexed_feature { my $self = shift; my $f = $self->_make_feature(@_); my $name = $f->display_name; $self->{load_data}{CurrentFeature} = $f; - $self->{load_data}{CurrentID} = $name; + $self->{load_data}{CurrentID} = $name; $self->{load_data}{IndexIt}{$name}++; return $f; } @@ -505,6 +528,9 @@ delete $attributes->{$_} foreach qw (Phase phase); } + $self->{load_data}{IndexIt}{$name}++ + if $self->index_subfeatures && $name; + return $self->sfclass->new(@args); } @@ -623,9 +649,10 @@ my %attributes; for my $pair (@pairs) { unless ($pair =~ /=/) { - push @{$attributes{Note}},quotewords('',0,$pair); + push @{$attributes{Note}},(quotewords('',0,$pair))[0] || $pair; } else { my ($tag,$value) = quotewords('\s*=\s*',0,$pair); + $tag = 'Note' if $tag eq 'description'; push @{$attributes{$tag}},$value; } } Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-02 23:28:40 UTC (rev 14583) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-03 00:05:37 UTC (rev 14584) @@ -269,7 +269,7 @@ $self->SUPER::create_load_data; $self->{load_data}{Parent2Child} = {}; $self->{load_data}{TemporaryID} = "GFFLoad0000000"; - $self->{load_data}{IndexSubfeatures} = 1; + $self->{load_data}{IndexSubfeatures} = $self->index_subfeatures(); $self->{load_data}{mode} = 'gff'; } @@ -378,11 +378,13 @@ my $instruction = shift; if ($instruction =~ /sequence-region\s+(.+)\s+(-?\d+)\s+(-?\d+)/i) { - my $feature = $self->sfclass->new(-name => $1, - -seq_id => $1, - -start => $2, - -end => $3, - -primary_tag => 'region'); + my($ref,$start,$end,$strand) = $self->_remap($1,$2,$3,+1); + my $feature = $self->sfclass->new(-name => $ref, + -seq_id => $ref, + -start => $start, + -end => $end, + -strand => $strand, + -primary_tag => 'region'); $self->store->store($feature); return; } @@ -390,6 +392,7 @@ if ($instruction =~/index-subfeatures\s+(\S+)/i) { $self->{load_data}{IndexSubfeatures} = $1; $self->store->index_subfeatures($1); + warn "index subfeatures = $1"; return; } } @@ -408,8 +411,11 @@ my $gff_line = shift; my $ld = $self->{load_data}; + $gff_line =~ s/\s+/\t/g if $self->allow_whitespace; + my @columns = map {$_ eq '.' ? undef : $_ } split /\t/,$gff_line; return unless @columns >= 8; + my ($refname,$source,$method,$start,$end, $score,$strand,$phase,$attributes) = @columns; $strand = $Strandedness{$strand||0}; my ($reserved,$unreserved) = $attributes ? $self->parse_attributes($attributes) : (); @@ -455,6 +461,8 @@ } } + ($refname,$start,$end,$strand) = $self->_remap($refname,$start,$end,$strand); + my @args = (-display_name => $name, -seq_id => $refname, -start => $start, @@ -520,6 +528,22 @@ } +=item allow_whitespace + + $allow_it = $loader->allow_whitespace([$newvalue]); + +Get or set the allow_whitespace flag. If true, then GFF3 files are allowed to +be delimited with whitespace in addition to tabs. + +=cut + +sub allow_whitespace { + my $self = shift; + my $d = $self->{allow_whitespace}; + $self->{allow_whitespace} = shift if @_; + $d; +} + =item store_current_feature $loader->store_current_feature() @@ -790,6 +814,20 @@ # sub unescape { } inherited +sub _remap { + my $self = shift; + my ($ref,$start,$end,$strand) = @_; + my $mapper = $self->coordinate_mapper; + return ($ref,$start,$end,$strand) unless $mapper; + + my ($newref,$coords) = $mapper->($ref,[$start,$end]); + if ($coords->[0] > $coords->[1]) { + @{$coords} = reverse(@{$coords}); + $strand *= -1; + } + return ($newref,@{$coords},$strand); +} + 1; __END__ Modified: bioperl-live/trunk/Bio/Graphics/FeatureBase.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureBase.pm 2008-03-02 23:28:40 UTC (rev 14583) +++ bioperl-live/trunk/Bio/Graphics/FeatureBase.pm 2008-03-03 00:05:37 UTC (rev 14584) @@ -195,7 +195,12 @@ $self->{score} = shift if @_; $d; } -sub primary_tag { shift->{type} } +sub primary_tag { + my $self = shift; + my $d = $self->{type}; + $self->{type} = shift if @_; + $d; +} sub name { my $self = shift; my $d = $self->{name}; @@ -385,7 +390,7 @@ sub desc { my $self = shift; - my $d = $self->{desc}; + my $d = $self->notes; $self->{desc} = shift if @_; $d; } @@ -408,7 +413,7 @@ sub notes { my $self = shift; - my $notes = $self->desc; + my $notes = $self->{desc}; return $notes if defined $notes; return $self->attributes('Note'); } Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-02 23:28:40 UTC (rev 14583) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-03 00:05:37 UTC (rev 14584) @@ -120,6 +120,7 @@ use Carp 'cluck','carp','croak'; use IO::File; use Text::ParseWords 'shellwords'; +use Bio::DB::SeqFeature::Store; # default colors for unconfigured features my @COLORS = qw(cyan blue red yellow green wheat turquoise orange); @@ -203,8 +204,8 @@ $self->{coordinate_mapper} = $args{-map_coords} if exists $args{-map_coords} && ref($args{-map_coords}) eq 'CODE'; - $self->smart_features($args{-smart_features}) if exists $args{-smart_features}; - $self->{safe} = $args{-safe} if exists $args{-safe}; + $self->smart_features($args{-smart_features}) if exists $args{-smart_features}; + $self->{safe} = $args{-safe} if exists $args{-safe}; # call with # -file @@ -271,19 +272,30 @@ my $self = shift; @@ Diff output truncated at 10000 characters. @@ From lstein at dev.open-bio.org Sun Mar 2 19:20:11 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Sun, 2 Mar 2008 19:20:11 -0500 Subject: [Bioperl-guts-l] [14585] bioperl-live/trunk/Bio/Graphics/FeatureFile.pm: added a version method to featurefile so that GBrowse can warn when the module is not up to date Message-ID: <200803030020.m230KA3o001962@dev.open-bio.org> Revision: 14585 Author: lstein Date: 2008-03-02 19:20:07 -0500 (Sun, 02 Mar 2008) Log Message: ----------- added a version method to featurefile so that GBrowse can warn when the module is not up to date Modified Paths: -------------- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-03 00:05:37 UTC (rev 14584) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-03 00:20:07 UTC (rev 14585) @@ -132,6 +132,14 @@ =over 4 +=item $version = Bio::Graphics::FeatureFile-Eversion + +Return the version number -- needed for API checking by GBrowse + +=cut + +sub version { return 2 } + =item $features = Bio::Graphics::FeatureFile-Enew(@args) Create a new Bio::Graphics::FeatureFile using @args to initialize the From bugzilla-daemon at portal.open-bio.org Sun Mar 2 19:27:17 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Sun, 2 Mar 2008 19:27:17 -0500 Subject: [Bioperl-guts-l] [Bug 1825] bug in parsing gene names for swissprot entry in module Bio::SeqIO:swiss In-Reply-To: Message-ID: <200803030027.m230RHw3013617@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=1825 hlapp at gmx.net changed: What |Removed |Added ---------------------------------------------------------------------------- CC|hlapp at gnf.org |hlapp at gmx.net -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Sun Mar 2 23:15:45 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Sun, 2 Mar 2008 23:15:45 -0500 Subject: [Bioperl-guts-l] [14586] bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm: partial fix for bug 2439 Message-ID: <200803030415.m234FjlG002304@dev.open-bio.org> Revision: 14586 Author: cjfields Date: 2008-03-02 23:15:44 -0500 (Sun, 02 Mar 2008) Log Message: ----------- partial fix for bug 2439 Modified Paths: -------------- bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm Modified: bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm =================================================================== --- bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm 2008-03-03 00:20:07 UTC (rev 14585) +++ bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm 2008-03-03 04:15:44 UTC (rev 14586) @@ -239,7 +239,7 @@ 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1); my $str; - if( ! defined $num || $num <= 1 ) { + if( $num <= 1 ) { $str = &{$self->start_report}($result); } @@ -453,19 +453,23 @@ # $hspstr .= "\n"; } - - # make table of search statistics and end the web page - $str .= "

\n".$hspstr."


Search Parameters

\n"; + $str .= "
ParameterValue

\n".$hspstr; + if ($result->available_parameters || $result->available_statistics) { + # make table of search statistics and end the web page + $str .= "


Search Parameters

"; + $str .= "\n"; + + foreach my $param ( sort $result->available_parameters ) { + $str .= "\n"; - foreach my $param ( sort $result->available_parameters ) { - $str .= "\n"; - + } + $str .= "
ParameterValue
$param". $result->get_parameter($param) ."
$param". $result->get_parameter($param) ."

Search Statistics

\n"; + foreach my $stat ( sort $result->available_statistics ) { + $str .= "\n"; + } + $str .= "
StatisticValue
$stat". $result->get_statistic($stat). "
"; } - $str .= "

Search Statistics

\n"; - foreach my $stat ( sort $result->available_statistics ) { - $str .= "\n"; - } - $str .= "
StatisticValue
$stat". $result->get_statistic($stat). "

".$self->footer() . "

\n"; + $str .= $self->footer() . "

\n"; return $str; } From bugzilla-daemon at portal.open-bio.org Mon Mar 3 07:57:08 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 3 Mar 2008 07:57:08 -0500 Subject: [Bioperl-guts-l] [Bug 2347] Bio::Tools::Run::Phylo::PAML::Baseml needs polishing and completion In-Reply-To: Message-ID: <200803031257.m23Cv8Yu020626@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2347 ------- Comment #2 from bix at sendu.me.uk 2008-03-03 07:57 EST ------- Likelihood of me doing it is small. I opened this bug so that someone with more knowledge of PAML could deal with it. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From lstein at dev.open-bio.org Mon Mar 3 12:14:34 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Mon, 3 Mar 2008 12:14:34 -0500 Subject: [Bioperl-guts-l] [14587] bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm: removed dangling debug statement Message-ID: <200803031714.m23HEYKA004947@dev.open-bio.org> Revision: 14587 Author: lstein Date: 2008-03-03 12:14:33 -0500 (Mon, 03 Mar 2008) Log Message: ----------- removed dangling debug statement Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-03 04:15:44 UTC (rev 14586) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-03 17:14:33 UTC (rev 14587) @@ -392,7 +392,6 @@ if ($instruction =~/index-subfeatures\s+(\S+)/i) { $self->{load_data}{IndexSubfeatures} = $1; $self->store->index_subfeatures($1); - warn "index subfeatures = $1"; return; } } From lstein at dev.open-bio.org Mon Mar 3 12:19:30 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Mon, 3 Mar 2008 12:19:30 -0500 Subject: [Bioperl-guts-l] [14588] bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm: added missing Loader.pm file Message-ID: <200803031719.m23HJUoN004987@dev.open-bio.org> Revision: 14588 Author: lstein Date: 2008-03-03 12:19:30 -0500 (Mon, 03 Mar 2008) Log Message: ----------- added missing Loader.pm file Added Paths: ----------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm Added: bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm (rev 0) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm 2008-03-03 17:19:30 UTC (rev 14588) @@ -0,0 +1,640 @@ +package Bio::DB::SeqFeature::Store::Loader; + +# $Id$ + +=head1 NAME + +Bio::DB::SeqFeature::Store::Loader -- Loader + +=head1 SYNOPSIS + +This is the base class for Bio::DB::SeqFeature::GFF3Loader, +Bio::DB::SeqFeature::GFFLoader, and +Bio::DB::SeqFeature::FeatureFileLoader. Please see the manual pages +for these modules. + +=cut + + +# load utility - incrementally load the store based on GFF3 file +# +# two modes: +# slow mode -- features can occur in any order in the GFF3 file +# fast mode -- all features with same ID must be contiguous in GFF3 file + +use strict; +use Carp 'croak'; +use IO::File; +use Bio::DB::GFF::Util::Rearrange; +use Bio::DB::SeqFeature::Store; +use File::Spec; +use base 'Bio::Root::Root'; + +use constant DEFAULT_SEQ_CHUNK_SIZE => 2000; + +=head2 new + + Title : new + Usage : $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(@options) + Function: create a new parser + Returns : a Bio::DB::SeqFeature::Store::GFF3Loader gff3 parser and loader + Args : several - see below + Status : public + +This method creates a new GFF3 loader and establishes its connection +with a Bio::DB::SeqFeature::Store database. Arguments are -name=E$value +pairs as described in this table: + + Name Value + ---- ----- + + -store A writeable Bio::DB::SeqFeature::Store database handle. + + -seqfeature_class The name of the type of Bio::SeqFeatureI object to create + and store in the database (Bio::DB::SeqFeature by default) + + -sf_class A shorter alias for -seqfeature_class + + -verbose Send progress information to standard error. + + -fast If true, activate fast loading (see below) + + -chunk_size Set the storage chunk size for nucleotide/protein sequences + (default 2000 bytes) + + -tmp Indicate a temporary directory to use when loading non-normalized + features. + + -map_coords A code ref that will transform a list of ($ref,[$start1,$end1]...) + coordinates into a list of ($newref,[$newstart1,$newend1]...) + + -index_subfeatures Indicate true if subfeatures should be indexed. Default is true if + not specified. + +When you call new(), a connection to a Bio::DB::SeqFeature::Store +database should already have been established and the database +initialized (if appropriate). + +Some combinations of Bio::SeqFeatures and Bio::DB::SeqFeature::Store +databases support a fast loading mode. Currently the only reliable +implementation of fast loading is the combination of DBI::mysql with +Bio::DB::SeqFeature. The other important restriction on fast loading +is the requirement that a feature that contains subfeatures must occur +in the GFF3 file before any of its subfeatures. Otherwise the +subfeatures that occurred before the parent feature will not be +attached to the parent correctly. This restriction does not apply to +normal (slow) loading. + +If you use an unnormalized feature class, such as +Bio::SeqFeature::Generic, then the loader needs to create a temporary +database in which to cache features until all their parts and subparts +have been seen. This temporary databases uses the "bdb" adaptor. The +-tmp option specifies the directory in which that database will be +created. If not present, it defaults to the system default tmp +directory specified by File::Spec-Etmpdir(). + +The -chunk_size option allows you to tune the representation of +DNA/Protein sequence in the Store database. By default, sequences are +split into 2000 base/residue chunks and then reassembled as +needed. This avoids the problem of pulling a whole chromosome into +memory in order to fetch a short subsequence from somewhere in the +middle. Depending on your usage patterns, you may wish to tune this +parameter using a chunk size that is larger or smaller than the +default. + +=cut + +sub new { + my $self = shift; + my ($store,$seqfeature_class,$tmpdir,$verbose,$fast, + $seq_chunk_size,$coordinate_mapper,$index_subfeatures) = + rearrange(['STORE', + ['SF_CLASS','SEQFEATURE_CLASS'], + ['TMP','TMPDIR'], + 'VERBOSE', + 'FAST', + 'CHUNK_SIZE', + 'MAP_COORDS', + 'INDEX_SUBFEATURES', + ], at _); + + $seqfeature_class ||= $self->default_seqfeature_class; + eval "require $seqfeature_class" unless $seqfeature_class->can('new'); + $self->throw($@) if $@; + + my $normalized = $seqfeature_class->can('subfeatures_are_normalized') + && $seqfeature_class->subfeatures_are_normalized; + + my $in_table = $seqfeature_class->can('subfeatures_are_stored_in_a_table') + && $seqfeature_class->subfeatures_are_stored_in_a_table; + + if ($fast) { + my $canfast = $normalized && $in_table; + warn <tmpdir(); + + my $tmp_store = Bio::DB::SeqFeature::Store->new(-adaptor => 'berkeleydb', + -temporary=> 1, + -dsn => $tmpdir, + -cache => 1, + -write => 1) + unless $normalized; + + return bless { + store => $store, + tmp_store => $tmp_store, + seqfeature_class => $seqfeature_class, + fast => $fast, + seq_chunk_size => $seq_chunk_size || DEFAULT_SEQ_CHUNK_SIZE, + verbose => $verbose, + load_data => {}, + subfeatures_normalized => $normalized, + subfeatures_in_table => $in_table, + coordinate_mapper => $coordinate_mapper, + index_subfeatures => $index_subfeatures, + },ref($self) || $self; +} + +sub coordinate_mapper { + my $self = shift; + my $d = $self->{coordinate_mapper}; + $self->{coordinate_mapper} = shift if @_; + $d; +} + +sub index_subfeatures { + my $self = shift; + my $d = $self->{index_subfeatures}; + $self->{index_subfeatures} = shift if @_; + $d; +} + +=head2 load + + Title : load + Usage : $count = $loader->load(@ARGV) + Function: load the indicated files or filehandles + Returns : number of feature lines loaded + Args : list of files or filehandles + Status : public + +Once the loader is created, invoke its load() method with a list of +GFF3 or FASTA file paths or previously-opened filehandles in order to +load them into the database. Compressed files ending with .gz, .Z and +.bz2 are automatically recognized and uncompressed on the fly. Paths +beginning with http: or ftp: are treated as URLs and opened using the +LWP GET program (which must be on your path). + +FASTA files are recognized by their initial "E" character. Do not feed +the loader a file that is neither GFF3 nor FASTA; I don't know what +will happen, but it will probably not be what you expect. + +=cut + +sub load { + my $self = shift; + my $start = $self->time(); + my $count = 0; + + for my $file_or_fh (@_) { + $self->msg("loading $file_or_fh...\n"); + my $fh = $self->open_fh($file_or_fh) or $self->throw("Couldn't open $file_or_fh: $!"); + $count += $self->load_fh($fh); + $self->msg(sprintf "load time: %5.2fs\n",$self->time()-$start); + } + $count; +} + +=head2 accessors + +The following read-only accessors return values passed or created during new(): + + store() the long-term Bio::DB::SeqFeature::Store object + + tmp_store() the temporary Bio::DB::SeqFeature::Store object used + during loading + + sfclass() the Bio::SeqFeatureI class + + fast() whether fast loading is active + + seq_chunk_size() the sequence chunk size + + verbose() verbose progress messages + +=cut + +sub store { shift->{store} } +sub tmp_store { shift->{tmp_store} } +sub sfclass { shift->{seqfeature_class} } +sub fast { shift->{fast} } +sub seq_chunk_size { shift->{seq_chunk_size} } +sub verbose { shift->{verbose} } + +=head2 Internal Methods + +The following methods are used internally and may be overidden by +subclasses. + +=over 4 + +=item default_seqfeature_class + + $class = $loader->default_seqfeature_class + +Return the default SeqFeatureI class (Bio::DB::SeqFeature). + +=cut + +sub default_seqfeature_class { + my $self = shift; + return 'Bio::DB::SeqFeature'; +} + +=item subfeatures_normalized + + $flag = $loader->subfeatures_normalized([$new_flag]) + +Get or set a flag that indicates that the subfeatures are +normalized. This is deduced from the SeqFeature class information. + +=cut + +sub subfeatures_normalized { + my $self = shift; + my $d = $self->{subfeatures_normalized}; + $self->{subfeatures_normalized} = shift if @_; + $d; +} + +=item subfeatures_in_table + + $flag = $loader->subfeatures_in_table([$new_flag]) + +Get or set a flag that indicates that feature/subfeature relationships +are stored in a table. This is deduced from the SeqFeature class and +Store information. + +=cut + +sub subfeatures_in_table { + my $self = shift; + my $d = $self->{subfeatures_in_table}; + $self->{subfeatures_in_table} = shift if @_; + $d; +} + +=item load_fh + + $count = $loader->load_fh($filehandle) + +Load the GFF3 data at the other end of the filehandle and return true +if successful. Internally, load_fh() invokes: + + start_load(); + do_load($filehandle); + finish_load(); + +=cut + +sub load_fh { + my $self = shift; + my $fh = shift; + $self->start_load(); + my $count = $self->do_load($fh); + $self->finish_load(); + $count; +} + + +=item start_load, finish_load + +These methods are called at the start and end of a filehandle load. + +=cut + +sub start_load { + my $self = shift; + $self->create_load_data; + $self->store->start_bulk_update() if $self->fast; +} + @@ Diff output truncated at 10000 characters. @@ From cjfields at dev.open-bio.org Mon Mar 3 14:37:57 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 3 Mar 2008 14:37:57 -0500 Subject: [Bioperl-guts-l] [14589] bioperl-live/trunk/Bio/SearchIO: some more for bug 2349 Message-ID: <200803031937.m23JbvfL005250@dev.open-bio.org> Revision: 14589 Author: cjfields Date: 2008-03-03 14:37:57 -0500 (Mon, 03 Mar 2008) Log Message: ----------- some more for bug 2349 Modified Paths: -------------- bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm bioperl-live/trunk/Bio/SearchIO/blast.pm Modified: bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm =================================================================== --- bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm 2008-03-03 17:19:30 UTC (rev 14588) +++ bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm 2008-03-03 19:37:57 UTC (rev 14589) @@ -299,10 +299,10 @@ ( $hit->significance ? $hit->significance : (defined $hsps[0] ? $hsps[0]->evalue : ' ')) ); + my $dline = &{$self->hit_desc_line}($self, $hit, $result); $hspstr .= "\n". - sprintf(">%s %s\n

Length = %s

\n\n", $url_align, - defined $hit->description ? $hit->description : '', - &_numwithcommas($hit->length)); + sprintf(">%s %s

Length = %s

\n\n", $url_align, + $dline , &_numwithcommas($hit->length)); my $ct = 0; foreach my $hsp (@hsps ) { next if( $hspfilter && ! &{$hspfilter}($hsp) ); @@ -454,20 +454,25 @@ } $str .= "

\n".$hspstr; - if ($result->available_parameters || $result->available_statistics) { + my ($pav, $sav) = ($result->available_parameters, $result->available_statistics); + if ($pav || $sav) { # make table of search statistics and end the web page $str .= "


Search Parameters

"; + if ($pav) { $str .= "\n"; - foreach my $param ( sort $result->available_parameters ) { $str .= "\n"; + } + $str .= "
ParameterValue
$param". $result->get_parameter($param) ."
"; + } - } - $str .= "

Search Statistics

\n"; + if ($sav) { + $str .= "

Search Statistics

StatisticValue
\n"; foreach my $stat ( sort $result->available_statistics ) { $str .= "\n"; } $str .= "
StatisticValue
$stat". $result->get_statistic($stat). "
"; + } } $str .= $self->footer() . "

\n"; return $str; @@ -499,7 +504,7 @@ =head2 default_hit_link_desc - Title : defaulthit_link_desc + Title : default_hit_link_desc Usage : $self->default_hit_link_desc($hit, $result) Function: Provides an HTML link(s) for the given hit to be used within the description section at the top of the BLAST report. @@ -563,6 +568,79 @@ return $self->{'_hit_link_align'} || \&default_hit_link_desc; } +=head2 hit_desc_line + + Title : hit_desc_line + Usage : $self->hit_desc_line(\&link_function); + Function: Get/Set the function which provides HTML for the description + information from a hit. This allows one to parse + the rest of the description and split up lines, add links, etc. + Returns : Function reference + Args : Function reference + See Also: L + +=cut + +sub hit_desc_line{ + my( $self, $code ) = @_; + if ($code) { + $self->{'_hit_desc_line'} = $code; + } + return $self->{'_hit_desc_line'} || \&default_hit_desc_line; +} + +=head2 default_hit_desc_line + + Title : default_hit_desc_line + Usage : $self->default_hit_desc_line($hit, $result) + Function: Parses the description line information, splits based on the + hidden \x01 between independent descriptions, checks the lines for + possible web links, and adds HTML link(s) for the given hit to be + used. + + Returns : string containing HTML markup ", L, L + +=cut + +sub default_hit_desc_line { + my($self, $hit, $result) = @_; + my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE'; + my @descs = split /\x01/, $hit->description; + #my $descline = join("
", at descs)."
"; + my $descline = ''; + #return $descline; + for my $sec (@descs) { + my $url = ''; + if ($sec =~ s/((?:gi\|(\d+)\|)? # optional GI + (\w+)\|([A-Z\d\.\_]+) # main + (\|[A-Z\d\_]+)?) # optional secondary ID//xms) { + my ($name, $gi, $db, $acc) = ($1, $2, $3, $4); + #$acc ||= ($rest) ? $rest : $gi; + $acc =~ s/^\s+(\S+)/$1/; + $acc =~ s/(\S+)\s+$/$1/; + $url = + length($self->remote_database_url($type)) > 0 ? + sprintf('
%s %s', + sprintf($self->remote_database_url($type), + $gi || $acc || $db), + $name, $sec) : $sec; + } else { + $url = $sec; + } + $descline .= "$url
\n"; + } + return $descline; +} + =head2 start_report Title : start_report Modified: bioperl-live/trunk/Bio/SearchIO/blast.pm =================================================================== --- bioperl-live/trunk/Bio/SearchIO/blast.pm 2008-03-03 17:19:30 UTC (rev 14588) +++ bioperl-live/trunk/Bio/SearchIO/blast.pm 2008-03-03 19:37:57 UTC (rev 14589) @@ -827,6 +827,7 @@ last; } else { + s/^\s(?!\s)/\x01/; #new line to concatenate desc lines with $restofline .= $_; } } From bugzilla-daemon at portal.open-bio.org Mon Mar 3 14:50:50 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 3 Mar 2008 14:50:50 -0500 Subject: [Bioperl-guts-l] [Bug 2439] multiple results HTMLResultWriter.pm and non-redundant entries in SearchIO In-Reply-To: Message-ID: <200803031950.m23Joo5v014834@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2439 ------- Comment #4 from cjfields at uiuc.edu 2008-03-03 14:50 EST ------- I have mostly implemented (1) and (3) (the last using a new handler to deal with description line information). The description line splitting needs to be tested against other SearchIO output but it works for SearchIO::blast. It would be nice to get it working with at least blastxml. Just so that you understand why some of these are problematic to implement, realize the Writer objects all push ResultI objects individually into to_string(), so by default we are assuming each string output constitutes a BLAST report with one result as opposed to a report with multiple results. The best way I can think of to get around these issues is to have a way of passing the entire input SearchIO stream to an output stream so the events are called in order (multiple times if needed). We can then have proper start/end_document() event calls for header/footer output, initiate as many to_string() events as needed and initiate/reset the writer state if necessary, something like: # $out is a SearchIO with a writer attached. $out->write_stream($in); # write multiple results with single header and footer. As it is now, you get a header and footer in each report. I may try adding this functionality in, which should help with resetting Writer state (an issue that's popping up with (2)). -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Mon Mar 3 15:16:39 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 3 Mar 2008 15:16:39 -0500 Subject: [Bioperl-guts-l] [14590] bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm: bug 2449 Message-ID: <200803032016.m23KGdPg005397@dev.open-bio.org> Revision: 14590 Author: cjfields Date: 2008-03-03 15:16:39 -0500 (Mon, 03 Mar 2008) Log Message: ----------- bug 2449 Modified Paths: -------------- bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm Modified: bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm =================================================================== --- bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm 2008-03-03 19:37:57 UTC (rev 14589) +++ bioperl-live/trunk/Bio/SearchIO/Writer/HTMLResultWriter.pm 2008-03-03 20:16:39 UTC (rev 14590) @@ -208,7 +208,6 @@ my ($qtype,$dbtype,$dbseqtype,$type); my $alg = $result->algorithm; - # This is actually wrong for the FASTAs I think if( $alg =~ /T(FAST|BLAST)([XY])/i ) { $qtype = $dbtype = 'translated'; @@ -282,8 +281,8 @@ # no HSPs so no link $str .= sprintf('%s %s%s%.2g'."\n", $url_desc, $descsub, - ($hit->raw_score ? $hit->raw_score : - (defined $hsps[0] ? $hsps[0]->score : ' ')), + ($hit->bits ? $hit->bits : + (defined $hsps[0] ? $hsps[0]->bits : ' ')), ( $hit->significance ? $hit->significance : (defined $hsps[0] ? $hsps[0]->evalue : ' ')) ); @@ -293,8 +292,8 @@ $str .= sprintf('%s %s%s%.2g'."\n", $url_desc, $descsub, - ($hit->raw_score ? $hit->raw_score : - (defined $hsps[0] ? $hsps[0]->score : ' ')), + ($hit->bits ? $hit->bits : + (defined $hsps[0] ? $hsps[0]->bits : ' ')), $acc, ( $hit->significance ? $hit->significance : (defined $hsps[0] ? $hsps[0]->evalue : ' ')) From bugzilla-daemon at portal.open-bio.org Mon Mar 3 15:17:14 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 3 Mar 2008 15:17:14 -0500 Subject: [Bioperl-guts-l] [Bug 2449] HTMLWriter out of sync In-Reply-To: Message-ID: <200803032017.m23KHEUS016751@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2449 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #1 from cjfields at uiuc.edu 2008-03-03 15:17 EST ------- Fixed in svn. I won't mess with the others (as I indicated). -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Mon Mar 3 15:58:08 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 3 Mar 2008 15:58:08 -0500 Subject: [Bioperl-guts-l] [Bug 2465] New: Bio::FeatureIO::gff typo in sequence_region() function Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2465 Summary: Bio::FeatureIO::gff typo in sequence_region() function Product: BioPerl Version: unspecified Platform: All OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Core Components AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: yostinso at modencode.org This is working with a copy of current_core_unstable.tar.bz2 downloaded from bioperl.org in early 2008. (The datestamp for the original seems to be 2007-02-14.) The subroutine "sequence_region" in Bio::FeatureIO::gff is an accessor that should allow both getting and setting of a sequence region feature by seq_id. There's a typo that breaks this functionality - the setter code uses: $self->{'sequence_region'}{$k} = $v; The getter code uses: return $self->{'sequence-region'}{$k}; The error is that an underscore (_) is used in the setter, and a hyphen-minus (-) is used in the getter. This makes the getter functionality completely inoperative. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Mon Mar 3 16:05:10 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Mon, 3 Mar 2008 16:05:10 -0500 Subject: [Bioperl-guts-l] [14591] bioperl-live/trunk/Bio/FeatureIO/gff.pm: bug 2465 Message-ID: <200803032105.m23L5Ar1005611@dev.open-bio.org> Revision: 14591 Author: cjfields Date: 2008-03-03 16:05:10 -0500 (Mon, 03 Mar 2008) Log Message: ----------- bug 2465 Modified Paths: -------------- bioperl-live/trunk/Bio/FeatureIO/gff.pm Modified: bioperl-live/trunk/Bio/FeatureIO/gff.pm =================================================================== --- bioperl-live/trunk/Bio/FeatureIO/gff.pm 2008-03-03 20:16:39 UTC (rev 14590) +++ bioperl-live/trunk/Bio/FeatureIO/gff.pm 2008-03-03 21:05:10 UTC (rev 14591) @@ -329,7 +329,7 @@ return $v; } elsif(defined($k)){ - return $self->{'sequence-region'}{$k}; + return $self->{'sequence_region'}{$k}; } else { return; From bugzilla-daemon at portal.open-bio.org Mon Mar 3 16:06:20 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Mon, 3 Mar 2008 16:06:20 -0500 Subject: [Bioperl-guts-l] [Bug 2465] Bio::FeatureIO::gff typo in sequence_region() function In-Reply-To: Message-ID: <200803032106.m23L6KWd019875@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2465 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #1 from cjfields at uiuc.edu 2008-03-03 16:06 EST ------- Fixed in svn. Just to note, FeatureIO will be going through an overhaul in the next bioperl dev series. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Tue Mar 4 00:21:38 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Tue, 4 Mar 2008 00:21:38 -0500 Subject: [Bioperl-guts-l] [14592] bioperl-live/trunk/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm: bug 1707 Message-ID: <200803040521.m245LcDZ006223@dev.open-bio.org> Revision: 14592 Author: cjfields Date: 2008-03-04 00:21:38 -0500 (Tue, 04 Mar 2008) Log Message: ----------- bug 1707 Modified Paths: -------------- bioperl-live/trunk/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm Modified: bioperl-live/trunk/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm =================================================================== --- bioperl-live/trunk/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm 2008-03-03 21:05:10 UTC (rev 14591) +++ bioperl-live/trunk/Bio/DB/GFF/Adaptor/dbi/caching_handle.pm 2008-03-04 05:21:38 UTC (rev 14592) @@ -109,14 +109,10 @@ # find a non-busy dbh my $dbh = $self->dbh || $self->throw("Can't connect to database: " . DBI->errstr); - if (my $sth = $self->{$dbh}{$query}) { - warn "Using cached statement handler\n" if $self->debug; - return $sth; - } else { - warn "Creating new statement handler\n" if $self->debug; - $sth = $dbh->prepare($query) || $self->throw("Couldn't prepare query $query:\n ".DBI->errstr."\n"); - return $self->{$dbh}{$query} = $sth; - } + + warn "Using prepare_cache\n" if $self->debug; + my $sth = $dbh->prepare_cached($query, {}, 3) || $self->throw("Couldn't prepare query $query:\n ".DBI->errstr."\n"); + return $sth; } sub do_query { From bugzilla-daemon at portal.open-bio.org Tue Mar 4 00:22:12 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 4 Mar 2008 00:22:12 -0500 Subject: [Bioperl-guts-l] [Bug 1707] Out of memory crash from Bio::DB::GFF::Adaptor::dbi::pg.pm (Postgres) In-Reply-To: Message-ID: <200803040522.m245MCZ5018041@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=1707 ------- Comment #3 from cjfields at uiuc.edu 2008-03-04 00:22 EST ------- (In reply to comment #2) > The caching_handle module is probably superseded by DBI's prepare_cached() > method. Perhaps it should just be removed from the distribution and replaced > with calls to prepare_cached()? > > Lincoln Lincoln, I found the problem with attempting to remove caching_handle as the DB handle was not very easy (there are a number of caching_handle methods used throughout the DBI-related modules in Bio::DB::GFF). Rather than risk additional bugs, I have committed a single small change in cached_handle which just delegates to DBI's prepare_cached($query, {}, 3). It passes tests for me using MySQL. Would that work? -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From avilella at dev.open-bio.org Tue Mar 4 09:54:44 2008 From: avilella at dev.open-bio.org (Albert Vilella) Date: Tue, 4 Mar 2008 09:54:44 -0500 Subject: [Bioperl-guts-l] [14593] bioperl-run/trunk/Bio/Tools/Run/Phylo/SLR.pm: silly type -- alignment must be sorted Message-ID: <200803041454.m24Esi7J008461@dev.open-bio.org> Revision: 14593 Author: avilella Date: 2008-03-04 09:54:43 -0500 (Tue, 04 Mar 2008) Log Message: ----------- silly type -- alignment must be sorted Modified Paths: -------------- bioperl-run/trunk/Bio/Tools/Run/Phylo/SLR.pm Modified: bioperl-run/trunk/Bio/Tools/Run/Phylo/SLR.pm =================================================================== --- bioperl-run/trunk/Bio/Tools/Run/Phylo/SLR.pm 2008-03-04 05:21:38 UTC (rev 14592) +++ bioperl-run/trunk/Bio/Tools/Run/Phylo/SLR.pm 2008-03-04 14:54:43 UTC (rev 14593) @@ -501,7 +501,7 @@ '-idlinebreak' => 1, '-idlength' => $MINNAMELEN > $aln->maxdisplayname_length() ? $MINNAMELEN : $aln->maxdisplayname_length() +1); - $alnout->write_aln($aln); + $alnout->write_aln($sorted_aln); $alnout->close(); undef $alnout; close($tempseqFH); From lstein at dev.open-bio.org Tue Mar 4 12:41:39 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Tue, 4 Mar 2008 12:41:39 -0500 Subject: [Bioperl-guts-l] [14594] bioperl-live/trunk/Bio: fixes out-of-memory problems during gff3 file loading ( at the cost of reduced performance), and should fix the problem of bio:: graphics dying when a feature does not have the source_tag method Message-ID: <200803041741.m24Hfdpb010024@dev.open-bio.org> Revision: 14594 Author: lstein Date: 2008-03-04 12:41:38 -0500 (Tue, 04 Mar 2008) Log Message: ----------- fixes out-of-memory problems during gff3 file loading (at the cost of reduced performance), and should fix the problem of bio::graphics dying when a feature does not have the source_tag method Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF2Loader.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm bioperl-live/trunk/Bio/Graphics/Glyph.pm Added Paths: ----------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF2Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF2Loader.pm 2008-03-04 14:54:43 UTC (rev 14593) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF2Loader.pm 2008-03-04 17:41:38 UTC (rev 14594) @@ -125,7 +125,7 @@ If you use an unnormalized feature class, such as Bio::SeqFeature::Generic, then the loader needs to create a temporary database in which to cache features until all their parts and subparts -have been seen. This temporary databases uses the "bdb" adaptor. The +have been seen. This temporary databases uses the "berkeleydb" adaptor. The -tmp option specifies the directory in which that database will be created. If not present, it defaults to the system default tmp directory specified by File::Spec-Etmpdir(). @@ -510,7 +510,7 @@ L, L, L, -L +L =head1 AUTHOR Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-04 14:54:43 UTC (rev 14593) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-04 17:41:38 UTC (rev 14594) @@ -53,6 +53,11 @@ You can use this to turn indexing on and off, overriding the default for a particular feature. +Note that the loader keeps a record -- in memory -- of each feature +that it has processed. If you find the loader running out of memory on +particularly large GFF3 files, please split the input file into +smaller pieces and do the load in steps. + =cut @@ -65,6 +70,8 @@ use strict; use Carp 'croak'; use Bio::DB::GFF::Util::Rearrange; +use Bio::DB::SeqFeature::Store::LoadHelper; + use base 'Bio::DB::SeqFeature::Store::Loader'; @@ -135,7 +142,7 @@ If you use an unnormalized feature class, such as Bio::SeqFeature::Generic, then the loader needs to create a temporary database in which to cache features until all their parts and subparts -have been seen. This temporary databases uses the "bdb" adaptor. The +have been seen. This temporary databases uses the "berkeleydb" adaptor. The -tmp option specifies the directory in which that database will be created. If not present, it defaults to the system default tmp directory specified by File::Spec-Etmpdir(). @@ -267,10 +274,12 @@ sub create_load_data { #overridden my $self = shift; $self->SUPER::create_load_data; - $self->{load_data}{Parent2Child} = {}; $self->{load_data}{TemporaryID} = "GFFLoad0000000"; $self->{load_data}{IndexSubfeatures} = $self->index_subfeatures(); $self->{load_data}{mode} = 'gff'; + + $self->{load_data}{Helper} = + Bio::DB::SeqFeature::Store::LoadHelper->new($self->{tmpdir}); } sub finish_load { #overridden @@ -486,7 +495,7 @@ } # Current feature is the same as a feature that was loaded earlier - elsif (my $id = $self->{load_data}{Local2GlobalID}{$feature_id}) { + elsif (my $id = $self->{load_data}{Helper}->local2global($feature_id)) { $old_feat = $self->fetch($feature_id) or $self->warn(<{ID}[0]; $index_it ||= $top_level; - $ld->{IndexIt}{$feature_id}++ if $index_it; - $ld->{TopLevel}{$feature_id}++ if !$self->{fast} && $top_level; # need to track top level features +# $ld->{IndexIt}{$feature_id}++ if $index_it; +# $ld->{TopLevel}{$feature_id}++ if !$self->{fast} +# && $top_level; # need to track top level features + my $helper = $ld->{Helper}; + $helper->indexit($feature_id=>1) if $index_it; + $helper->toplevel($feature_id=>1) if !$self->{fast} + && $top_level; # need to track top level features + + # remember parentage for my $parent (@parent_ids) { - push @{$ld->{Parent2Child}{$parent}},$feature_id; + $helper->add_children($parent=>$feature_id); } } @@ -582,17 +598,21 @@ sub build_object_tree_in_tables { my $self = shift; - my $store = $self->store; - my $ld = $self->{load_data}; + my $store = $self->store; + my $helper = $self->{load_data}{Helper}; - while (my ($load_id,$children) = each %{$ld->{Parent2Child}}) { - die $self->throw("$load_id doesn't have a primary id") unless exists $ld->{Local2GlobalID}{$load_id}; - my $parent_id = $ld->{Local2GlobalID}{$load_id}; - my @children = map {$ld->{Local2GlobalID}{$_}} @$children; + while (my ($load_id,$children) = $helper->each_family()) { - # this updates the table that keeps track of parent/child relationships, - # but does not update the parent object -- so (start,end) had better be right!!! - $store->add_SeqFeature($parent_id, at children); + my $parent_id = $helper->local2global($load_id); + die $self->throw("$load_id doesn't have a primary id") + unless defined $parent_id; + + + my @children = map {$helper->local2global($_)} @$children; + # this updates the table that keeps track of parent/child relationships, + # but does not update the parent object -- so (start,end) had better be right!!! + $store->add_SeqFeature($parent_id, at children); + } } @@ -614,11 +634,16 @@ my $ld = $self->{load_data}; my $normalized = $self->subfeatures_normalized; - while (my ($load_id) = each %{$ld->{TopLevel}}) { + my $helper = $ld->{Helper}; + + while (my $load_id = $helper->each_toplevel) { my $feature = $self->fetch($load_id) - or $self->throw("$load_id (id=$ld->{Local2GlobalID}{$load_id}) should have a database entry, but doesn't"); + or $self->throw("$load_id (id=" + .$helper->local2global($load_id) + ." should have a database entry, but doesn't"); $self->attach_children($store,$ld,$load_id,$feature); - $feature->primary_id(undef) unless $ld->{IndexIt}{$load_id}; # Indexed objects are updated, not created anew + # Indexed objects are updated, not created anew + $feature->primary_id(undef) unless $helper->indexit($load_id); $store->store($feature); } @@ -638,12 +663,12 @@ my $self = shift; my ($store,$ld,$load_id,$feature) = @_; - my $children = $ld->{Parent2Child}{$load_id} or return; + my $children = $ld->{Helper}->children() or return; for my $child_id (@$children) { - my $child = $self->fetch($child_id) - or $self->throw("$child_id should have a database entry, but doesn't"); - $self->attach_children($store,$ld,$child_id,$child); # recursive call - $feature->add_SeqFeature($child); + my $child = $self->fetch($child_id) + or $self->throw("$child_id should have a database entry, but doesn't"); + $self->attach_children($store,$ld,$child_id,$child); # recursive call + $feature->add_SeqFeature($child); } } @@ -660,13 +685,14 @@ sub fetch { my $self = shift; my $load_id = shift; - my $ld = $self->{load_data}; - my $id = $ld->{Local2GlobalID}{$load_id}; + my $helper = $self->{load_data}{Helper}; + my $id = $helper->local2global($load_id); return - $self->subfeatures_normalized || $ld->{IndexIt}{$load_id} - ? $self->store->fetch($id) - : $self->tmp_store->fetch($id); + $self->subfeatures_normalized || ($helper->indexit($load_id) + ? $self->store->fetch($id) + : $self->tmp_store->fetch($id) + ); } =item add_segment @@ -734,9 +760,9 @@ sub parse_attributes { my $self = shift; my $att = shift; - my @pairs = map { my ($name,$value) = split /=/; + my @pairs = map { my ($name,$value) = split '='; [$self->unescape($name) => $value]; - } split /;/,$att; + } split ';',$att; my (%reserved,%unreserved); foreach (@pairs) { my $tag = $_->[0]; @@ -827,6 +853,17 @@ return ($newref,@{$coords},$strand); } +sub _indexit { # override + my $self = shift; + return $self->{load_data}{Helper}->indexit(@_); +} + +sub _local2global { # override + my $self = shift; + return $self->{load_data}{Helper}->local2global(@_); +} + + 1; __END__ @@ -845,7 +882,7 @@ L, L, L, -L +L =head1 AUTHOR Added: bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm (rev 0) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm 2008-03-04 17:41:38 UTC (rev 14594) @@ -0,0 +1,129 @@ +package Bio::DB::SeqFeature::Store::LoadHelper; + +use strict; +use DB_File; +use File::Temp 'tempdir'; +use File::Spec; +use Fcntl qw(O_CREAT O_RDWR); + +sub new { + my $class = shift; + my $tmpdir = shift; + + my @tmpargs = $tmpdir ? (DIR=>$tmpdir) : (); + my $tmppath = tempdir(@tmpargs,CLEANUP=>1); + my $self = $class->create_dbs($tmppath); + return bless $self,$class; +} + +sub create_dbs { + my $self = shift; + my $tmp = shift; + my %self; + + # Each of these hashes allow only unique keys + for my $dbname qw(IndexIt TopLevel Local2Global) { + my %h; + tie(%h,'DB_File',File::Spec->catfile($tmp,$dbname), + O_CREAT|O_RDWR,0666,$DB_BTREE); + $self{$dbname} = \%h; + } + + # The Parent2Child hash allows duplicate keys, so we + # create it with the R_DUP flag. + my $btree_dups = DB_File::BTREEINFO->new(); + $btree_dups->{flags} = R_DUP; + my %h; + tie(%h,'DB_File',File::Spec->catfile($tmp,'Parent2Child'), + O_CREAT|O_RDWR,0666,$btree_dups); + $self{Parent2Child} = \%h; + + return \%self; +} + +sub indexit { + my $self = shift; + my $id = shift; + $self->{IndexIt}{$id} = shift if @_; @@ Diff output truncated at 10000 characters. @@ From lstein at dev.open-bio.org Tue Mar 4 14:44:17 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Tue, 4 Mar 2008 14:44:17 -0500 Subject: [Bioperl-guts-l] [14595] bioperl-live/trunk/Bio: fix problems parsing configuration files containing embedded "[" characters inside option values Message-ID: <200803041944.m24JiH9Q010806@dev.open-bio.org> Revision: 14595 Author: lstein Date: 2008-03-04 14:44:17 -0500 (Tue, 04 Mar 2008) Log Message: ----------- fix problems parsing configuration files containing embedded "[" characters inside option values Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm bioperl-live/trunk/Bio/Graphics/FeatureFile.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-04 17:41:38 UTC (rev 14594) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-04 19:44:17 UTC (rev 14595) @@ -360,9 +360,12 @@ if (++$load_data->{count} % 1000 == 0) { my $now = $self->time(); my $nl = -t STDOUT && !$ENV{EMACS} ? "\r" : "\n"; - $self->msg(sprintf("%d features loaded in %5.2fs...$nl", - $load_data->{count},$now - $load_data->{start_time})); - $load_data->{start_time} = $now; + $self->msg(sprintf("%d features loaded in %5.2fs (%5.2fs/1000 features)...%s$nl", + $load_data->{count},$now - $load_data->{start_time}, + $now - $load_data->{millenium_time}, + ' ' x 80 + )); + $load_data->{millenium_time} = $now; } } Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm 2008-03-04 17:41:38 UTC (rev 14594) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/LoadHelper.pm 2008-03-04 19:44:17 UTC (rev 14595) @@ -1,5 +1,40 @@ package Bio::DB::SeqFeature::Store::LoadHelper; +# $Id$ + +=head1 NAME + +Bio::DB::SeqFeature::Store::LoadHelper -- Internal utility for Bio::DB::SeqFeature::Store + +=head1 SYNOPSIS + +For internal use only. + +=head1 DESCRIPTION + +For internal use only + +=head1 SEE ALSO + +L, +L, +L, +L, +L, +L, +L + +=head1 AUTHOR + +Lincoln Stein Elstein at cshl.orgE. + +Copyright (c) 2006 Cold Spring Harbor Laboratory. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + use strict; use DB_File; use File::Temp 'tempdir'; @@ -21,21 +56,23 @@ my $tmp = shift; my %self; + my $hash_options = DB_File::HASHINFO->new(); + # Each of these hashes allow only unique keys for my $dbname qw(IndexIt TopLevel Local2Global) { my %h; tie(%h,'DB_File',File::Spec->catfile($tmp,$dbname), - O_CREAT|O_RDWR,0666,$DB_BTREE); + O_CREAT|O_RDWR,0666,$hash_options); $self{$dbname} = \%h; } # The Parent2Child hash allows duplicate keys, so we # create it with the R_DUP flag. - my $btree_dups = DB_File::BTREEINFO->new(); - $btree_dups->{flags} = R_DUP; + my $btree_options = DB_File::BTREEINFO->new(); + $btree_options->{flags} = R_DUP; my %h; tie(%h,'DB_File',File::Spec->catfile($tmp,'Parent2Child'), - O_CREAT|O_RDWR,0666,$btree_dups); + O_CREAT|O_RDWR,0666,$btree_options); $self{Parent2Child} = \%h; return \%self; Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm 2008-03-04 17:41:38 UTC (rev 14594) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/Loader.pm 2008-03-04 19:44:17 UTC (rev 14595) @@ -375,9 +375,14 @@ my $self = shift; my $fh = shift; - $self->{load_data}{start_time} = $self->time(); + $self->{load_data}{start_time} = $self->time(); + $self->{load_data}->{millenium_time} = $self->{load_data}{start_time}; $self->load_line($_) while <$fh>; - $self->msg(' 'x80,"\n"); #clear screen + $self->msg(sprintf "%d features loaded in %5.2fs%s\r", + $self->{load_data}->{count}, + $self->time()-$self->{load_data}{start_time}, + ' 'x80 + ); $self->{load_data}{count}; } Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-04 17:41:38 UTC (rev 14594) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-04 19:44:17 UTC (rev 14595) @@ -464,7 +464,9 @@ # Are we in a configuration section or a data section? # We start out in 'config' state, and are triggered to # reenter config state whenever we see a /^\[ pattern (config section) + my $old_state = $self->{state}; my $new_state = $self->_state_transition($line); +# warn "$old_state->$new_state: $line"; if ($new_state eq 'config') { $self->parse_config_line($line); @@ -490,7 +492,6 @@ return 'config' if $line =~ /^\s*$/; #empty line return 'config' if $line =~ m/^\s*\[([^\]]+)\]|=/; # section beginning - return 'config' if $line =~ m/\\$/; # continuation line return 'config' if $line =~ m/^\s+(.+)/ && $self->{current_tag}; # continuation section return 'config' if $line =~ /^\#/; # comment -not a meta return 'data'; @@ -512,7 +513,7 @@ return 1; } - elsif (/^\s*\[([^\]]+)\]/) { # beginning of a configuration section + elsif (/^\[([^\]]+)\]/) { # beginning of a configuration section my $label = $1; my $cc = $label =~ /^(general|default)$/i ? 'general' : $label; # normalize push @{$self->{types}},$cc unless $cc eq 'general'; From bugzilla-daemon at portal.open-bio.org Tue Mar 4 16:16:10 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 4 Mar 2008 16:16:10 -0500 Subject: [Bioperl-guts-l] [Bug 2337] BDB flatfile index should store global configuration data in BDB In-Reply-To: Message-ID: <200803042116.m24LGAAG014299@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2337 ------- Comment #1 from cjfields at uiuc.edu 2008-03-04 16:16 EST ------- I guess we need to review the specs and fix this accordingly. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Mar 4 16:45:12 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 4 Mar 2008 16:45:12 -0500 Subject: [Bioperl-guts-l] [Bug 2336] bp_bioflat_index.pl and Bio::DB::Flat::BDB::fasta creates incorrect index In-Reply-To: Message-ID: <200803042145.m24LjCd0016148@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2336 bosborne at alum.mit.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |ASSIGNED -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Mar 4 16:45:50 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 4 Mar 2008 16:45:50 -0500 Subject: [Bioperl-guts-l] [Bug 2337] BDB flatfile index should store global configuration data in BDB In-Reply-To: Message-ID: <200803042145.m24LjoHv016217@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2337 bosborne at alum.mit.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |ASSIGNED -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Mar 4 16:46:07 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 4 Mar 2008 16:46:07 -0500 Subject: [Bioperl-guts-l] [Bug 2338] The first 4 bytes of flatfile index is wrong (--indextype flat) In-Reply-To: Message-ID: <200803042146.m24Lk7p1016263@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2338 bosborne at alum.mit.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |ASSIGNED -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Tue Mar 4 16:46:23 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Tue, 4 Mar 2008 16:46:23 -0500 Subject: [Bioperl-guts-l] [Bug 2339] In a flat file index, the size of the final entry of a fasta file is wrong In-Reply-To: Message-ID: <200803042146.m24LkNdS016305@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2339 bosborne at alum.mit.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |ASSIGNED -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From lstein at dev.open-bio.org Tue Mar 4 17:22:49 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Tue, 4 Mar 2008 17:22:49 -0500 Subject: [Bioperl-guts-l] [14596] bioperl-live/trunk/Bio/Graphics/FeatureFile.pm: config file parser should be more stable in the face of odd blank lines, funny comments, etc Message-ID: <200803042222.m24MMnuH011273@dev.open-bio.org> Revision: 14596 Author: lstein Date: 2008-03-04 17:22:48 -0500 (Tue, 04 Mar 2008) Log Message: ----------- config file parser should be more stable in the face of odd blank lines, funny comments, etc Modified Paths: -------------- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-04 19:44:17 UTC (rev 14595) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-04 22:22:48 UTC (rev 14596) @@ -466,8 +466,14 @@ # reenter config state whenever we see a /^\[ pattern (config section) my $old_state = $self->{state}; my $new_state = $self->_state_transition($line); -# warn "$old_state->$new_state: $line"; + # warn "$old_state->$new_state: $line"; + + if ($new_state ne $old_state) { + delete $self->{current_config}; + delete $self->{current_tag}; + } + if ($new_state eq 'config') { $self->parse_config_line($line); } elsif ($new_state eq 'data') { @@ -491,9 +497,12 @@ return 'data' if $line =~ /^reference\s*=/; # feature-file reference sequence directive return 'config' if $line =~ /^\s*$/; #empty line - return 'config' if $line =~ m/^\s*\[([^\]]+)\]|=/; # section beginning - return 'config' if $line =~ m/^\s+(.+)/ && $self->{current_tag}; # continuation section - return 'config' if $line =~ /^\#/; # comment -not a meta + return 'config' if $line =~ m/^\[([^\]]+)\]/; # section beginning + return 'config' if $line =~ m/^[\w\s]+=/ + && $self->{current_config}; # configuration line + return 'config' if $line =~ m/^\s+(.+)/ + && $self->{current_tag}; # continuation section + return 'config' if $line =~ /^\#/; # comment -not a meta return 'data'; } return $current_state; @@ -532,7 +541,9 @@ elsif (/^$/) { # empty line - undef $self->{current_tag}; + # no longer required -- new sections are indicated by the start of a [stanza] + # line and not by termination with a blank line + # undef $self->{current_tag}; return 1; } From lstein at dev.open-bio.org Tue Mar 4 20:26:54 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Tue, 4 Mar 2008 20:26:54 -0500 Subject: [Bioperl-guts-l] [14597] bioperl-live/trunk/Bio: quenched many warnings and small bugs that crop up when processing slightly invalid config files Message-ID: <200803050126.m251QsJm014223@dev.open-bio.org> Revision: 14597 Author: lstein Date: 2008-03-04 20:26:54 -0500 (Tue, 04 Mar 2008) Log Message: ----------- quenched many warnings and small bugs that crop up when processing slightly invalid config files Modified Paths: -------------- bioperl-live/trunk/Bio/DB/GFF/Typename.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm bioperl-live/trunk/Bio/Graphics/FeatureFile.pm Modified: bioperl-live/trunk/Bio/DB/GFF/Typename.pm =================================================================== --- bioperl-live/trunk/Bio/DB/GFF/Typename.pm 2008-03-04 22:22:48 UTC (rev 14596) +++ bioperl-live/trunk/Bio/DB/GFF/Typename.pm 2008-03-05 01:26:54 UTC (rev 14597) @@ -52,7 +52,7 @@ my ($method,$source) = @_; $method ||= ''; $source ||= ''; - if ($source eq '' && $method =~ /^(\w+):(\w*)$/) { + if ($source eq '' && $method =~ /^([\w-]+):([\w-]*)$/) { $method = $1; $source = $2; } Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm 2008-03-04 22:22:48 UTC (rev 14596) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/FeatureFileLoader.pm 2008-03-05 01:26:54 UTC (rev 14597) @@ -381,7 +381,7 @@ my $type = shift @tokens; my $name = shift @tokens; $ld->{CurrentGroup} = $self->_make_indexed_feature($name,$type,'',{_ff_group=>1}); - $ld->{IndexIt}{$name}++; + $self->_indexit($name => 1); return; } @@ -407,9 +407,6 @@ $self->_store_group(); } - # WARNING: DON'T FORGET THE COORDINATE MAPPER - # AND CALCULATION OF MIN AND MAX EXTENTS! - $type = '' unless defined $type; $name = '' unless defined $name; $type ||= $ld->{CurrentGroup}->primary_tag if $ld->{CurrentGroup}; @@ -483,7 +480,7 @@ my $name = $f->display_name; $self->{load_data}{CurrentFeature} = $f; $self->{load_data}{CurrentID} = $name; - $self->{load_data}{IndexIt}{$name}++; + $self->_indexit($name => 1); return $f; } @@ -528,7 +525,7 @@ delete $attributes->{$_} foreach qw (Phase phase); } - $self->{load_data}{IndexIt}{$name}++ + $self->_indexit($name=>1) if $self->index_subfeatures && $name; return $self->sfclass->new(@args); @@ -568,7 +565,7 @@ $self->store_current_feature() if $ld->{CurrentFeature}; $ld->{CurrentFeature} = $group; $ld->{CurrentID} = $group->display_name; - $ld->{IndexIt}{$ld->{CurrentID}}++; + $self->_indexit($ld->{CurrentID} => 1); undef $ld->{CurrentGroup}; $self->store_current_feature(); } Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-04 22:22:48 UTC (rev 14596) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-05 01:26:54 UTC (rev 14597) @@ -472,7 +472,7 @@ } } - ($refname,$start,$end,$strand) = $self->_remap($refname,$start,$end,$strand); + ($refname,$start,$end,$strand) = $self->_remap($refname,$start,$end,$strand) or return; my @args = (-display_name => $name, -seq_id => $refname, @@ -847,8 +847,9 @@ my ($ref,$start,$end,$strand) = @_; my $mapper = $self->coordinate_mapper; return ($ref,$start,$end,$strand) unless $mapper; - + my ($newref,$coords) = $mapper->($ref,[$start,$end]); + return unless defined $coords->[0]; if ($coords->[0] > $coords->[1]) { @{$coords} = reverse(@{$coords}); $strand *= -1; Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-04 22:22:48 UTC (rev 14596) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-05 01:26:54 UTC (rev 14597) @@ -300,6 +300,7 @@ my @unconfigured_types = sort grep {!exists $lc_types{lc $_} && !exists $lc_types{lc $_->method} } $self->types; + my @configured_types = keys %types; my @labels_to_render = (@labels, at unconfigured_types); @@ -332,6 +333,7 @@ next if defined $selector and !$selector->($self,$label); my @features = grep {$self->_visible($_)} $self->features(\@types); + next unless @features; # suppress tracks for features that don't appear # fix up funky group hack @@ -467,7 +469,7 @@ my $old_state = $self->{state}; my $new_state = $self->_state_transition($line); - # warn "$old_state->$new_state: $line"; +# warn "$old_state=>$new_state: $line"; if ($new_state ne $old_state) { delete $self->{current_config}; From lstein at dev.open-bio.org Wed Mar 5 00:42:57 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Wed, 5 Mar 2008 00:42:57 -0500 Subject: [Bioperl-guts-l] [14598] bioperl-live/trunk/Bio: fixed an issue that prevented DAS features from rendering correctly on Bio ::Graphics::Panels (label missing) Message-ID: <200803050542.m255gvb1014553@dev.open-bio.org> Revision: 14598 Author: lstein Date: 2008-03-05 00:42:57 -0500 (Wed, 05 Mar 2008) Log Message: ----------- fixed an issue that prevented DAS features from rendering correctly on Bio::Graphics::Panels (label missing) Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Segment.pm bioperl-live/trunk/Bio/Graphics/FeatureFile.pm bioperl-live/trunk/Bio/Graphics/Glyph/Factory.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Segment.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Segment.pm 2008-03-05 01:26:54 UTC (rev 14597) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Segment.pm 2008-03-05 05:42:57 UTC (rev 14598) @@ -201,6 +201,16 @@ $self->{store}->features(@args,-seqid=>$self->{seqid},-start=>$self->{start},-end=>$self->{end}); } +sub types { + my $self = shift; + my %types; + my $iterator = $self->get_seq_stream(@_); + while (my $f = $iterator->next_seq) { + $types{$f->type}++; + } + return %types; +} + =head2 get_seq_stream Title : get_seq_stream Modified: bioperl-live/trunk/Bio/Graphics/FeatureFile.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-05 01:26:54 UTC (rev 14597) +++ bioperl-live/trunk/Bio/Graphics/FeatureFile.pm 2008-03-05 05:42:57 UTC (rev 14598) @@ -457,8 +457,8 @@ my $self = shift; my $line = shift; - s/\015//g; # get rid of carriage returns left over by MS-DOS/Windows systems - s/\s+$//; # get rid of trailing whitespace + $line =~ s/\015//g; # get rid of carriage returns left over by MS-DOS/Windows systems + $line =~ s/\s+$//; # get rid of trailing whitespace return 1 if $line =~ /^\s*\#[^\#]?$/; # comment line Modified: bioperl-live/trunk/Bio/Graphics/Glyph/Factory.pm =================================================================== --- bioperl-live/trunk/Bio/Graphics/Glyph/Factory.pm 2008-03-05 01:26:54 UTC (rev 14597) +++ bioperl-live/trunk/Bio/Graphics/Glyph/Factory.pm 2008-03-05 05:42:57 UTC (rev 14598) @@ -387,6 +387,7 @@ if (exists $self->{options} && (my $map = $self->{options})) { if (exists $map->{$option_name} && defined(my $value = $map->{$option_name})) { my $feature = $glyph->feature; + return $value unless ref $value eq 'CODE'; my $val = eval { $value->($feature,$option_name,$partno,$total_parts,$glyph)}; warn "Error returned while evaluating value of '$option_name' option for glyph $glyph, feature $feature: ",$@,"\n" From cjfields at dev.open-bio.org Wed Mar 5 10:57:35 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 5 Mar 2008 10:57:35 -0500 Subject: [Bioperl-guts-l] [14599] bioperl-db/trunk/Bio/DB/BioSQL/BasePersistenceAdaptor.pm: Throw if no primary key defined ( cases where the persistent object is not created and one attempts to remove in same run ). Message-ID: <200803051557.m25FvZpg015757@dev.open-bio.org> Revision: 14599 Author: cjfields Date: 2008-03-05 10:57:34 -0500 (Wed, 05 Mar 2008) Log Message: ----------- Throw if no primary key defined (cases where the persistent object is not created and one attempts to remove in same run). Modified Paths: -------------- bioperl-db/trunk/Bio/DB/BioSQL/BasePersistenceAdaptor.pm Modified: bioperl-db/trunk/Bio/DB/BioSQL/BasePersistenceAdaptor.pm =================================================================== --- bioperl-db/trunk/Bio/DB/BioSQL/BasePersistenceAdaptor.pm 2008-03-05 05:42:57 UTC (rev 14598) +++ bioperl-db/trunk/Bio/DB/BioSQL/BasePersistenceAdaptor.pm 2008-03-05 15:57:34 UTC (rev 14599) @@ -303,6 +303,8 @@ $self->_remove_from_obj_cache($obj); # obtain primary key my $pk = $obj->primary_key(); + $self->throw("Object of class ".ref($obj)." does not have ". + "a primary key. Have you used \$pobj->create()?") if !defined $pk; # prepared delete statement cached? my $cache_key = 'DELETE '.ref($obj->obj()); my $sth = $self->sth($cache_key); From bosborne at dev.open-bio.org Wed Mar 5 11:48:49 2008 From: bosborne at dev.open-bio.org (Brian Osborne) Date: Wed, 5 Mar 2008 11:48:49 -0500 Subject: [Bioperl-guts-l] [14600] bioperl-live/trunk/Bio/SeqIO/entrezgene.pm: Add example Message-ID: <200803051648.m25Gmn7M015939@dev.open-bio.org> Revision: 14600 Author: bosborne Date: 2008-03-05 11:48:49 -0500 (Wed, 05 Mar 2008) Log Message: ----------- Add example Modified Paths: -------------- bioperl-live/trunk/Bio/SeqIO/entrezgene.pm Modified: bioperl-live/trunk/Bio/SeqIO/entrezgene.pm =================================================================== --- bioperl-live/trunk/Bio/SeqIO/entrezgene.pm 2008-03-05 15:57:34 UTC (rev 14599) +++ bioperl-live/trunk/Bio/SeqIO/entrezgene.pm 2008-03-05 16:48:49 UTC (rev 14600) @@ -50,6 +50,26 @@ The C<-debug> and C<-locuslink> options slow down the parser. +Example code which looks for ontology terms: + + my $eio = new Bio::SeqIO(-file => $file, + -format => 'entrezgene', + -service_record => 'yes'); + + while (my $seq = $eio->next_seq) { + my $gid = $seq->accession_number; + foreach my $ot ($ann->get_Annotations('OntologyTerm')) { + next if ($ot->term->authority eq 'STS marker'); # No STS markers + my $evid = $ot->comment; + $evid =~ s/evidence: //i; + my @ref = $ot->term->get_references; + my $id = $ot->identifier; + my $fid = 'GO:' . sprintf("%07u",$id); + print join("\t",$gid, $ot->ontology->name, $ot->name, $evid, + $fid, @ref?$ref[0]->medline:''), "\n"; + } + } + =head1 FEEDBACK =head2 Mailing Lists From cjfields at dev.open-bio.org Wed Mar 5 13:12:35 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 5 Mar 2008 13:12:35 -0500 Subject: [Bioperl-guts-l] [14601] bioperl-db/trunk/Bio/DB/BioSQL/SeqAdaptor.pm: bug 2226 Message-ID: <200803051812.m25ICZ5l017601@dev.open-bio.org> Revision: 14601 Author: cjfields Date: 2008-03-05 13:12:35 -0500 (Wed, 05 Mar 2008) Log Message: ----------- bug 2226 Modified Paths: -------------- bioperl-db/trunk/Bio/DB/BioSQL/SeqAdaptor.pm Modified: bioperl-db/trunk/Bio/DB/BioSQL/SeqAdaptor.pm =================================================================== --- bioperl-db/trunk/Bio/DB/BioSQL/SeqAdaptor.pm 2008-03-05 16:48:49 UTC (rev 14600) +++ bioperl-db/trunk/Bio/DB/BioSQL/SeqAdaptor.pm 2008-03-05 18:12:35 UTC (rev 14601) @@ -285,8 +285,10 @@ # there may be features for this seq: search for those having a FK to # the seq my $query = Bio::DB::Query::BioQuery->new( - -datacollections => ["Bio::SeqFeatureI t1"], - -where => ["t1.entire_seq = ?"]); + -datacollections => ["Bio::SeqFeatureI t1"], + -where => ["t1.entire_seq = ?"], + -order => ["t1.rank"], + ); $qres = $self->_feat_adaptor()->find_by_query( $query, -name => "FIND FEATURE BY SEQ", From bugzilla-daemon at portal.open-bio.org Wed Mar 5 13:12:42 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 5 Mar 2008 13:12:42 -0500 Subject: [Bioperl-guts-l] [Bug 2226] wrong retrieval order from seqfeature table In-Reply-To: Message-ID: <200803051812.m25ICggi025716@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2226 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #1 from cjfields at uiuc.edu 2008-03-05 13:12 EST ------- Confirmed using latest subversion bioperl-db. The query object does take 'order', as it turns out, so I added an "-order => ['t1.rank'] " which works and passes bioperl-db tests. You'll need to update bioperl-db via subversion. I'm closing this out; feel free to reopen it if this doesn't solve the problem. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From cjfields at dev.open-bio.org Wed Mar 5 13:31:19 2008 From: cjfields at dev.open-bio.org (Christopher John Fields) Date: Wed, 5 Mar 2008 13:31:19 -0500 Subject: [Bioperl-guts-l] [14602] bioperl-db/trunk/Bio/DB/BioSQL/BaseDriver.pm: get rid of undefined string warnings in verbose output Message-ID: <200803051831.m25IVJ1C017812@dev.open-bio.org> Revision: 14602 Author: cjfields Date: 2008-03-05 13:31:19 -0500 (Wed, 05 Mar 2008) Log Message: ----------- get rid of undefined string warnings in verbose output Modified Paths: -------------- bioperl-db/trunk/Bio/DB/BioSQL/BaseDriver.pm Modified: bioperl-db/trunk/Bio/DB/BioSQL/BaseDriver.pm =================================================================== --- bioperl-db/trunk/Bio/DB/BioSQL/BaseDriver.pm 2008-03-05 18:12:35 UTC (rev 14601) +++ bioperl-db/trunk/Bio/DB/BioSQL/BaseDriver.pm 2008-03-05 18:31:19 UTC (rev 14602) @@ -126,7 +126,7 @@ "Bio::Ontology::PathI" => "term_path", "Bio::Ontology::Path" => "term_path", "Bio::DB::BioSQL::PathAdaptor" => "term_path", - "Bio::Ontology::OntologyI" => "ontology", + "Bio::Ontology::OntologyI" => "ontology", "Bio::DB::BioSQL::OntologyAdaptor" => "ontology", # TermSynonym is a hack - there is no such object "TermSynonym" => "term_synonym", @@ -1081,10 +1081,11 @@ if($slotmap->{$slots[$i]} && (substr($slotmap->{$slots[$i]},0,2) ne '=>')) { if($adp->verbose > 0) { - $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2). - "::update: ". - "binding column $j to \"" . - $slotvals->[$i] . "\" ($slots[$i])\n"); + $adp->debug(sprintf("%s::update: binding column %d to \"%s\"(%s)\n", + substr(ref($adp),rindex(ref($adp),"::")+2), + $j, + $slotvals->[$i] || '', + ($slots[$i]))); } $self->bind_param($sth, $j, $slotvals->[$i]); $j++; From bugzilla-daemon at portal.open-bio.org Wed Mar 5 17:13:34 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 5 Mar 2008 17:13:34 -0500 Subject: [Bioperl-guts-l] [Bug 2280] crash on the attempt to store same sequence in a diff. namespace In-Reply-To: Message-ID: <200803052213.m25MDY1P007089@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2280 ------- Comment #2 from cjfields at uiuc.edu 2008-03-05 17:13 EST ------- I'm not sure how you are using load_seqdatabase.pl here; I think the script by default assumes you are loading new sequences in the database unless you specify options like 'remove', 'update', 'safe', etc., otherwise it dies if dups are possibly being inserted into the database ('safe' just bypasses the errors, and I believe 'remove' and 'update' do what they suggest). The test script you attached also tries to switch the namespace directly by getting the persistent obj from the database, assign it a new namespace, and then store it. The problem with this approach is you are attempting to store the object using the same assigned primary_key (so it would indeed move it, as you're updating the current obj, not a create()). Notably, using create() with a pers. object with an assigned primary_key() gets you an error (and a hint): ------------- EXCEPTION: Bio::Root::Exception ------------- MSG: must not change primary_key() once it is set STACK: Error::throw STACK: Bio::Root::Root::throw /Users/cjfields/bioperl/bioperl-live/Bio/Root/Root.pm:357 STACK: Bio::DB::Persistent::PersistentObject::primary_key /Users/cjfields/bioperl/db/Bio/DB/Persistent/PersistentObject.pm:321 STACK: Bio::DB::Persistent::Seq::primary_key /Users/cjfields/bioperl/db/Bio/DB/Persistent/Seq.pm:124 STACK: Bio::DB::BioSQL::BasePersistenceAdaptor::create /Users/cjfields/bioperl/db/Bio/DB/BioSQL/BasePersistenceAdaptor.pm:211 STACK: Bio::DB::Persistent::PersistentObject::create /Users/cjfields/bioperl/db/Bio/DB/Persistent/PersistentObject.pm:244 STACK: test.pl:33 ----------------------------------------------------------- The way I have worked out to do this is to reset the seq object's primary_key() by assigning it undef prior to using store() or create() (which assigns a new primary key for the object, even if it is in the same namespace): # store the found sequence in the second biodatabase: my $pseq = $seqadp->create_persistent($seq); $pseq->namespace($ns2); $pseq->primary_key(undef); $pseq->store(); # assign new primary key $seqadp->commit; This works as long as the sequence namespace doesn't match an already present one. It might be worth adding some tests to make sure remove()-ing one persistent sequence doesn't cause problems with the other sequences in different namespaces. I would also like Hilmar to comment on this as well to see if this is an adequate solution or if there are potential problems. (In reply to comment #0) > Hi All, > > I've found that 'Duplicate entry' crash occurs if I store the same > sequence the second time (but in a different namespace). > > The attached archive contains complete and reproducable > (I believe) example for this issue. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From bugzilla-daemon at portal.open-bio.org Wed Mar 5 17:13:53 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Wed, 5 Mar 2008 17:13:53 -0500 Subject: [Bioperl-guts-l] [Bug 2281] unable to copy a sequence from one namespace to another one In-Reply-To: Message-ID: <200803052213.m25MDrwI007135@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2281 ------- Comment #3 from cjfields at uiuc.edu 2008-03-05 17:13 EST ------- See bug 2280 -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From lstein at dev.open-bio.org Thu Mar 6 10:38:19 2008 From: lstein at dev.open-bio.org (Lincoln Stein) Date: Thu, 6 Mar 2008 10:38:19 -0500 Subject: [Bioperl-guts-l] [14603] bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm: fixed loading problem that arose when a feature had two parents but feature repeated twice on different lines Message-ID: <200803061538.m26FcJ3U024155@dev.open-bio.org> Revision: 14603 Author: lstein Date: 2008-03-06 10:38:18 -0500 (Thu, 06 Mar 2008) Log Message: ----------- fixed loading problem that arose when a feature had two parents but feature repeated twice on different lines Modified Paths: -------------- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm =================================================================== --- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-05 18:31:19 UTC (rev 14602) +++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-03-06 15:38:18 UTC (rev 14603) @@ -509,7 +509,11 @@ } # contiguous feature, so add a segment - if (defined $old_feat) { + if (defined $old_feat && + ($old_feat->seq_id != $refname) && + ($old_feat->start != $start) && + ($old_feat->end != $end) # make sure endpoints are distinct + ) { $self->add_segment($old_feat,$self->sfclass->new(@args)); return; } @@ -692,7 +696,7 @@ my $id = $helper->local2global($load_id); return - $self->subfeatures_normalized || ($helper->indexit($load_id) + ($self->subfeatures_normalized || $helper->indexit($load_id) ? $self->store->fetch($id) : $self->tmp_store->fetch($id) ); From bugzilla-daemon at portal.open-bio.org Thu Mar 6 11:11:15 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 6 Mar 2008 11:11:15 -0500 Subject: [Bioperl-guts-l] [Bug 2413] save_tempfiles() not working in Bio::Tools::Run::WrapperBase In-Reply-To: Message-ID: <200803061611.m26GBFVw006738@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2413 cjfields at uiuc.edu changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution| |FIXED ------- Comment #4 from cjfields at uiuc.edu 2008-03-06 11:11 EST ------- I'm closing this out; the bug fix appears to work fine. Florent, you an reopen this if the fix doesn't work for you. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From sheldon.mckay at gmail.com Thu Mar 6 11:59:08 2008 From: sheldon.mckay at gmail.com (Sheldon McKay) Date: Thu, 6 Mar 2008 11:59:08 -0500 Subject: [Bioperl-guts-l] svn issue? Message-ID: Hi all, I can't seem to do an svn checkout with the incantation below. I tried it out on a couple of servers. Is there a problem at the bioperl end? Thanks, Sheldon $ svn co svn://code.open-bio.org/bioperl/bioperl-live/trunk bioperl-live svn: Can't find a temporary directory: Error string not specified yet From cjfields at uiuc.edu Thu Mar 6 12:36:35 2008 From: cjfields at uiuc.edu (Chris Fields) Date: Thu, 6 Mar 2008 11:36:35 -0600 Subject: [Bioperl-guts-l] svn issue? In-Reply-To: References: Message-ID: I've forwarded this to the help desk. Hopefully someone there can answer/fix this. chris On Mar 6, 2008, at 10:59 AM, Sheldon McKay wrote: > Hi all, > > I can't seem to do an svn checkout with the incantation below. I > tried it out on a couple of servers. Is there a problem at the > bioperl end? > > Thanks, > Sheldon > > $ svn co svn://code.open-bio.org/bioperl/bioperl-live/trunk bioperl- > live > svn: Can't find a temporary directory: Error string not specified yet > _______________________________________________ > Bioperl-guts-l mailing list > Bioperl-guts-l at lists.open-bio.org > http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l Christopher Fields Postdoctoral Researcher Lab of Dr. Robert Switzer Dept of Biochemistry University of Illinois Urbana-Champaign From bugzilla-daemon at portal.open-bio.org Thu Mar 6 14:48:56 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 6 Mar 2008 14:48:56 -0500 Subject: [Bioperl-guts-l] [Bug 2466] New: NCBIHelper redirecting RefSeq sequence download to EBI server Message-ID: http://bugzilla.open-bio.org/show_bug.cgi?id=2466 Summary: NCBIHelper redirecting RefSeq sequence download to EBI server Product: BioPerl Version: 1.5 branch Platform: PC OS/Version: Linux Status: NEW Severity: normal Priority: P2 Component: Core Components AssignedTo: bioperl-guts-l at bioperl.org ReportedBy: nsoranzo at tiscali.it In Bio::DB::NCBIHelper sub get_Stream_by_acc calls sub _check_id , which, if the sequence is from RefSeq, redirects the download request using Bio::DB:RefSeq . The RefSeq object then uses the EBI server, which is not the primary source for RefSeq sequences. And in fact it gives error for some sequences, like NP_001105557 . NCBI server instead works fine, as can be seen by passing -no_redirect => 1 when creating a GenPept object, then calling get_Seq_by_acc. I'd suggest to eliminate this part # Asking for a RefSeq from EMBL/GenBank unless ($self->no_redirect) { if ($ids =~ /N._/) { $self->warn("[$ids] is not a normal sequence database but a RefSeq entry.". " Redirecting the request.\n") if $self->verbose >= 0; return new Bio::DB::RefSeq; } } from sub _check_id , and also the _no_redirect initialization. In this way, GenPept, GenBank and EntrezGene will simply use NCBI servers for RefSeq sequence queries. -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From David.Messina at sbc.su.se Thu Mar 6 15:13:38 2008 From: David.Messina at sbc.su.se (Dave Messina) Date: Thu, 6 Mar 2008 21:13:38 +0100 Subject: [Bioperl-guts-l] svn issue? In-Reply-To: References: Message-ID: <628aabb70803061213n61ccbf28s4a65dc80e9dc624f@mail.gmail.com> Hey Sheldon, IIRC this is a problem with svn filling up some temp space on the server. So should be cleared up soon. D From bugzilla-daemon at portal.open-bio.org Thu Mar 6 17:47:25 2008 From: bugzilla-daemon at portal.open-bio.org (bugzilla-daemon at portal.open-bio.org) Date: Thu, 6 Mar 2008 17:47:25 -0500 Subject: [Bioperl-guts-l] [Bug 2413] save_tempfiles() not working in Bio::Tools::Run::WrapperBase In-Reply-To: Message-ID: <200803062247.m26MlPsV014575@portal.open-bio.org> http://bugzilla.open-bio.org/show_bug.cgi?id=2413 ------- Comment #5 from florent.angly at gmail.com 2008-03-06 17:47 EST ------- The fix works for me too. The bug can stay closed! Thanks! -- Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- You are the assignee for the bug, or are watching the assignee. From scain at dev.open-bio.org Fri Mar 7 17:19:18 2008 From: scain at dev.open-bio.org (Scott Cain) Date: Fri, 7 Mar 2008 17:19:18 -0500 Subject: [Bioperl-guts-l] [14604] bioperl-live/trunk: adding an option for `perl Build. PL` to accept the default of asked questions. Message-ID: <200803072219.m27MJIC4000890@dev.open-bio.org> Revision: 14604 Author: scain Date: 2008-03-07 17:19:17 -0500 (Fri, 07 Mar 2008) Log Message: ----------- adding an option for `perl Build.PL` to accept the default of asked questions. To use, do `perl Build.PL --accept 1` Modified Paths: -------------- bioperl-live/trunk/Build.PL bioperl-live/trunk/ModuleBuildBioperl.pm Modified: bioperl-live/trunk/Build.PL =================================================================== --- bioperl-live/trunk/Build.PL 2008-03-06 15:38:18 UTC (rev 14603) +++ bioperl-live/trunk/Build.PL 2008-03-07 22:19:17 UTC (rev 14604) @@ -15,8 +15,6 @@ our @drivers; -my $mysql_ok = 0; - # Set up the ModuleBuildBioperl object my $build = ModuleBuildBioperl->new( module_name => 'Bio', @@ -94,7 +92,7 @@ BioDBSeqFeature_mysql => { description => "MySQL tests for Bio::DB::SeqFeature::Store", feature_requires => { 'DBI' => 0, 'DBD::mysql' => 0 }, - test => \&test_db_sf + test => \&test_db }, Network => { description => "Enable tests that need an internet connection", @@ -109,20 +107,19 @@ #script_files => [] # scripts in scripts directory are installed on-demand ); -prompt_for_biodb() if $build->feature('BioDBGFF') || $build->feature('BioDBSeqFeature_mysql'); +my $accept = $build->args->{accept}; # Handle auto features if ($build->feature('BioDBSeqFeature_BDB')) { make_bdb_test(); } -if ($build->feature('BioDBSeqFeature_mysql') && $mysql_ok) { - # will return without doing anything if user chose not to run tests during - # prompt_for_biodb() above +if ($build->feature('BioDBSeqFeature_mysql')) { make_dbi_test(); } # Ask questions -$build->choose_scripts; +$build->choose_scripts($accept); +prompt_for_biodbgff($accept) if $build->feature('BioDBGFF'); { if ($build->args('network')) { if ($build->feature('Network')) { @@ -135,7 +132,7 @@ } } else { - prompt_for_network() if $build->feature('Network'); + prompt_for_network($accept) if $build->feature('Network'); } # then in test script: # use Module::Build; @@ -154,11 +151,11 @@ exit; + sub make_bdb_test { my $path0 = File::Spec->catfile('t', 'BioDBSeqFeature.t'); my $path = File::Spec->catfile('t', 'BioDBSeqFeature_BDB.t'); - unlink($path) if (-e $path); - open(my $F, ">", $path) || die "Can't create test file\n"; + open my $F, ">$path"; print $F <add_to_manifest_skip($path); } -sub test_db_sf { +sub test_db { eval {require DBI;}; # if not installed, this sub won't actually be called - @drivers = DBI->available_drivers; - unless (grep {/mysql/i} @drivers) { - $mysql_ok = 0; - return "Only MySQL DBI driver supported for BioDBSeqFeature_mysql tests"; + unless (eval {DBI->connect('dbi:mysql:test',undef,undef,{RaiseError=>0,PrintError=>0})}) { + return "Could not connect to test database"; } - $mysql_ok = 1; return; } sub make_dbi_test { - my $dsn = $build->notes('test_dsn') || return; my $path0 = File::Spec->catfile('t', 'BioDBSeqFeature.t'); my $path = File::Spec->catfile('t', 'BioDBSeqFeature_mysql.t'); - my $test_db = $build->notes('test_db'); - my $user = $build->notes('test_user'); - my $pass = $build->notes('test_pass'); open my $F,">$path"; - my $str = "$path0 -adaptor DBI::mysql -create 1 -temp 1 -dsn $dsn"; - $str .= " -user $user" if $user; - $str .= " -password $pass" if $pass; print $F <add_to_cleanup($path); @@ -206,10 +193,10 @@ return; } -sub prompt_for_biodb { - my $proceed = $build->y_n("Do you want to run the BioDBGFF or ". - "BioDBSeqFeature_mysql live database tests? ". -