[Bioperl-guts-l] [15625] bioperl-live/trunk/Bio: backed out changes to primary_id that broke seqfeature::store

Lincoln Stein lstein at dev.open-bio.org
Mon Apr 6 13:24:59 EDT 2009


Revision: 15625
Author:   lstein
Date:     2009-04-06 13:24:58 -0400 (Mon, 06 Apr 2009)

Log Message:
-----------
backed out changes to primary_id that broke seqfeature::store

Modified Paths:
--------------
    bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm
    bioperl-live/trunk/Bio/SeqFeature/Lite.pm

Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm	2009-04-04 08:57:40 UTC (rev 15624)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm	2009-04-06 17:24:58 UTC (rev 15625)
@@ -12,7 +12,7 @@
 use File::Path 'rmtree','mkpath';
 use File::Basename;
 use File::Spec;
-use Carp 'carp';
+use Carp 'carp','croak';
 
 use constant BINSIZE => 10_000;
 use constant MININT  => -999_999_999_999;
@@ -61,7 +61,7 @@
 
   # use the GFF3 loader to do a bulk write:
   my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store   => $db,
-                                                           -verbose => 1,
+                                                           -verbose => 0,
                                                            -fast    => 1);
   $loader->load('/home/fly4.3/dmel-all.gff');
 
@@ -189,6 +189,9 @@
  -temp              Pass true to create temporary index files that will
                     be deleted once the script exits.
 
+ -verbose           Pass true to report autoindexing operations on STDERR.
+                    (default is true).
+
 Examples:
 
 To create an empty database which will be populated using calls to
@@ -257,6 +260,8 @@
 		    'VERBOSE'
 		  ], at _);
 
+  $verbose = 1 unless defined $verbose;
+
   if ($autoindex) {
     -d $autoindex or $self->throw("Invalid directory $autoindex");
     $directory ||= "$autoindex/indexes";
@@ -301,13 +306,15 @@
     my $autodir = shift;
     my $result  = $self->needs_auto_reindexing($autodir);
 
-    if (%$result) {
+    if ($result && %$result) {
+	$self->flag_autoindexing(1);
 	$self->lock('exclusive');
 	$self->reindex_wigfiles($result->{wig},$autodir)  if $result->{wig};
 	$self->reindex_ffffiles($result->{fff},$autodir)  if $result->{fff};
 	$self->reindex_gfffiles($result->{gff},$autodir) if $result->{gff};
 	$self->dna_db(Bio::DB::Fasta::Subdir->new($autodir));
 	$self->unlock;
+	$self->flag_autoindexing(0);
     }
 
     else {
@@ -315,6 +322,47 @@
     }
 }
 
+sub autoindex_flagfile { 
+    return File::Spec->catfile(shift->directory,'autoindex.pid');
+}
+sub auto_index_in_process {
+    my $self = shift;
+    my $flag_file = $self->autoindex_flagfile;
+    return unless -e $flag_file;
+
+    # if flagfile exists, then check that PID still exists
+    open my $fh,$flag_file or die "Couldn't open $flag_file: $!";
+    my $pid = <$fh>;
+    close $fh;
+    return 1 if kill 0=>$pid;
+    warn "Autoindexing seems to be running in another process, but the process has gone away. Trying to override...";
+    if (unlink $flag_file) {
+	warn "Successfully removed stale PID file." if $self->verbose;
+	warn "Assuming partial reindexing process. Rebuilding indexes from scratch..." if $self->verbose;
+	my $glob = File::Spec->catfile($self->directory,'*');
+	unlink glob($glob);
+	return;
+    } else {
+	croak ("Cannot recover from apparent aborted autoindex process. Please remove files in ",
+	     $self->directory,
+	     " and allow the adaptor to reindex");
+	return 1;
+    }
+}
+
+sub flag_autoindexing {
+    my $self = shift;
+    my $doit = shift;
+    my $flag_file = $self->autoindex_flagfile;
+    if ($doit) {
+	open my $fh,'>',$flag_file or die "Couldn't open $flag_file: $!";
+	print $fh $$;
+	close $fh;
+    } else {
+	unlink $flag_file;
+    }
+}
+
 sub reindex_gfffiles {
     my $self    = shift;
     my $files   = shift;
@@ -323,11 +371,13 @@
     warn "Reindexing GFF files...\n" if $self->verbose;
     $self->_permissions(1,1);
     $self->_close_databases();
-    $self->_open_databases(1,1);
+    $self->_open_databases(1,0);
     require Bio::DB::SeqFeature::Store::GFF3Loader
 	unless Bio::DB::SeqFeature::Store::GFF3Loader->can('new');
     my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store    => $self,
-							     -sf_class => $self->seqfeature_class) 
+							     -sf_class => $self->seqfeature_class,
+							     -verbose  => $self->verbose,
+	) 
 	or $self->throw("Couldn't create GFF3Loader");
     my %seen;
     $loader->load(grep {!$seen{$_}++} @$files);
@@ -346,7 +396,9 @@
     require Bio::DB::SeqFeature::Store::FeatureFileLoader
 	unless Bio::DB::SeqFeature::Store::FeatureFileLoader->can('new');
     my $loader = Bio::DB::SeqFeature::Store::FeatureFileLoader->new(-store    => $self,
-								    -sf_class => $self->seqfeature_class) 
+								    -sf_class => $self->seqfeature_class,
+								    -verbose  => $self->verbose,
+	) 
 	or $self->throw("Couldn't create FeatureFileLoader");
     my %seen;
     $loader->load(grep {!$seen{$_}++} @$files);
@@ -401,6 +453,9 @@
     my $autodir = shift;
     my $result    = {};
 
+    # don't allow two processes to reindex simultaneously
+	$self->auto_index_in_process and croak "Autoindexing in process. Try again later";
+
     # first interrogate the GFF3 files, using the timestamp file
     # as modification comparison
     my (@gff3, at fff, at wig,$fasta,$fasta_index_time);
@@ -408,6 +463,7 @@
 	or $self->throw("Couldn't open directory $autodir for reading: $!");
 
     my $maxtime   = 0;
+    my $timestamp_time  = _mtime($self->_mtime_path) || 0;
     while (defined (my $node = readdir($D))) {
 	next if $node =~ /^\./;
 	my $path      = File::Spec->catfile($autodir,$node);
@@ -417,6 +473,7 @@
 	    my $mtime = _mtime(\*_);  # not a typo
 	    $maxtime   = $mtime if $mtime > $maxtime;
 	    push @gff3,$path;
+#	    push @gff3,$path if $mtime > $timestamp_time;
 	}
 	
 	
@@ -424,6 +481,7 @@
 	    my $mtime = _mtime(\*_);  # not a typo
 	    $maxtime   = $mtime if $mtime > $maxtime;
 	    push @fff,$path;
+#	    push @fff,$path if $mtime > $timestamp_time;
 	}
 	
 	elsif ($path =~ /\.wig$/i) {
@@ -443,9 +501,9 @@
 	}
     }
     closedir $D;
-    my $timestamp_time  = _mtime($self->_mtime_path) || 0;
     
     $result->{gff}     = \@gff3 if $maxtime > $timestamp_time;
+#    $result->{gff}     = \@gff3 if @gff3;
     $result->{wig}     = \@wig  if @wig;
     $result->{fff}     = \@fff  if @fff;
     $result->{fasta}++ if $fasta;

Modified: bioperl-live/trunk/Bio/SeqFeature/Lite.pm
===================================================================
--- bioperl-live/trunk/Bio/SeqFeature/Lite.pm	2009-04-04 08:57:40 UTC (rev 15624)
+++ bioperl-live/trunk/Bio/SeqFeature/Lite.pm	2009-04-06 17:24:58 UTC (rev 15625)
@@ -514,8 +514,9 @@
   my $self = shift;
   my $d = $self->{primary_id};
   $self->{primary_id} = shift if @_;
-  return $d if defined $d;
-  return (overload::StrVal($self) =~ /0x([a-f0-9]+)/)[0];
+  return $d;
+#  return $d if defined $d;
+#  return (overload::StrVal($self) =~ /0x([a-f0-9]+)/)[0];
 }
 
 sub notes {
@@ -690,7 +691,7 @@
 
     if ($recurse) {
 	$self->_traverse($parent_tree) unless %$parent_tree;  # this will record parents of all children
-	my $primary_id = defined $force_id ? $force_id : $self->primary_id;
+	my $primary_id = defined $force_id ? $force_id : $self->_real_or_dummy_id;
 
 	return if $seenit->{$primary_id}++;
 
@@ -728,13 +729,20 @@
 		map {$_->gff3_string(1,$parent_tree,$seenit)} @rsf);
 }
 
+sub _real_or_dummy_id {
+    my $self = shift;
+    my $id   = $self->primary_id;
+    return $id if defined $id;
+    return return (overload::StrVal($self) =~ /0x([a-f0-9]+)/)[0];
+}
+
 sub _traverse {
     my $self   = shift;
     my $tree   = shift;  # tree => {$child}{$parent} = 1
     my $parent = shift;
-    my $id     = $self->primary_id;
+    my $id     = $self->_real_or_dummy_id;
     defined $id or return;
-    $tree->{$id}{$parent->primary_id}++ if $parent;
+    $tree->{$id}{$parent->_real_or_dummy_id}++ if $parent;
     $_->_traverse($tree,$self) foreach $self->get_SeqFeatures;
 }
 
@@ -808,7 +816,7 @@
     my @values = $self->each_tag_value($t);
     push @result,join '=',$self->escape($t),join(',', map {$self->escape($_)} @values) if @values;
   }
-  my $id        = $self->escape($self->primary_id) || $fallback_id;
+  my $id        = $self->escape($self->_real_or_dummy_id) || $fallback_id;
 
   my $parent_id;
   if (@$parent) {




More information about the Bioperl-guts-l mailing list