[Bioperl-guts-l] bioperl-live/Bio/DB/GFF/Adaptor/dbi pg.pm, 1.17, 1.18
Scott Cain
scain at pub.open-bio.org
Wed Feb 22 16:39:07 EST 2006
Update of /home/repository/bioperl/bioperl-live/Bio/DB/GFF/Adaptor/dbi
In directory pub.open-bio.org:/tmp/cvs-serv3933/Bio/DB/GFF/Adaptor/dbi
Modified Files:
pg.pm
Log Message:
added documentation and a commented out version of the search_notes
method; this is in preparation of commiting another pg adaptor that
implements a fast full text search
Index: pg.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/DB/GFF/Adaptor/dbi/pg.pm,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** pg.pm 10 Feb 2006 14:51:47 -0000 1.17
--- pg.pm 22 Feb 2006 21:39:04 -0000 1.18
***************
*** 5,9 ****
Bio::DB::GFF::Adaptor::dbi::pg -- Database adaptor for a specific postgres schema
! =head1 SYNOPSIS
SQL commands that need to be executed before this adaptor will work:
--- 5,9 ----
Bio::DB::GFF::Adaptor::dbi::pg -- Database adaptor for a specific postgres schema
! =head1 NOTES
SQL commands that need to be executed before this adaptor will work:
***************
*** 25,28 ****
--- 25,72 ----
GRANT SELECT ON TABLE ftype TO nobody;
+ =head2 Optimizing the database
+
+ PostgreSQL generally requires some tuning before you get very good
+ performance for large databases. For general information on tuning
+ a PostgreSQL server, see http://www.varlena.com/GeneralBits/Tidbits/perf.html
+ Of particular importance is executing VACUUM FULL ANALYZE whenever
+ you change the database.
+
+ Additionally, for a GFF database, there are a few items you can tune.
+ For each automatic class in your GBrowse conf file, there will be one
+ or two searches done when searching for a feature. If there are lots
+ of features, these search can take several seconds. To speed these searches,
+ do two things:
+
+ =over
+
+ =item 1
+
+ Set 'enable_seqscan = false' in your postgresql.conf file (and restart
+ your server).
+
+ =item 2
+
+ Create 'partial' indexes for each automatic class, doing this for the
+ example class 'Allele':
+
+ CREATE INDEX partial_allele_gclass ON
+ fgroup (lower('gname')) WHERE gclass='Allele';
+
+ And be sure to run VACUUM FULL ANALYZE after creating the indexes.
+
+ =back
+
+ =head2 search_notes
+
+ This PostgreSQL adaptor does not implement the search notes method
+ because it can be very slow (although the code for the method is
+ contained in this method but commented out).
+ There is, however, a PostgreSQL adaptor that does implement it in
+ a more efficient way: L<Bio::DB::GFF::Adaptor::dbi::pg_fts>,
+ which inherits from this adaptor and uses the optional PostgreSQL
+ module TSearch2 for full text indexing. See that adaptor's
+ documentation for more information.
+
See also L<Bio::DB::GFF>
***************
*** 63,67 ****
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
! WHERE fattribute_to_feature.fattribute_value=?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
--- 107,111 ----
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
! WHERE lower(fattribute_to_feature.fattribute_value)=lower(?)
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
***************
*** 81,85 ****
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
! WHERE fattribute_to_feature.fattribute_value LIKE ?
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
--- 125,129 ----
gname
FROM fdata,fgroup,fattribute,fattribute_to_feature
! WHERE lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
AND fgroup.gclass=?
AND fgroup.gid=fdata.gid
***************
*** 107,110 ****
--- 151,163 ----
;
+ use constant FULLTEXTWILDCARD => <<END;
+ SELECT distinct gclass,gname,fattribute_value
+ FROM fgroup,fattribute_to_feature,fdata
+ WHERE fgroup.gid=fdata.gid
+ AND fdata.fid=fattribute_to_feature.fid
+ AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
+ END
+ ;
+
########################
# moved from mysqlopt.pm
***************
*** 457,460 ****
--- 510,516 ----
CREATE INDEX fattribute_txt_idx ON fattribute_to_feature (fattribute_value)
},
+ fattribute_lower_idx => q{
+ CREATE INDEX fattribute_lower_idx ON fattribute_to_feature (lower(fattribute_value))
+ },
} # fattribute_to_feature indexes
} # fattribute_to_feature
***************
*** 819,822 ****
--- 875,879 ----
$query .= " ORDER BY $order_by" if $order_by;
+ $self->dbh->do('set enable_seqscan=off');
my $sth = $self->dbh->do_query($query, at args);
$sth;
***************
*** 888,891 ****
--- 945,952 ----
=head2 search_notes
+ This method is not available by default--if you have a small
+ database or very powerful hardware, you can uncomment the
+ method to use it.
+
Title : search_notes
Usage : @search_results = $db->search_notes("full text search string",$limit)
***************
*** 895,900 ****
Status : public
! This is a mysql-specific method. Given a search string, it performs a
! full-text search of the notes table and returns an array of results.
Each row of the returned array is a arrayref containing the following fields:
--- 956,961 ----
Status : public
! This is a replacement for the mysql-specific method. Given a search string, it
! performs a ILIKE search of the notes table and returns an array of results.
Each row of the returned array is a arrayref containing the following fields:
***************
*** 903,907 ****
--- 964,1000 ----
column 3 A relevance score.
+ Note that for large databases this can be very slow and may result in
+ time out or 500-cgi errors. If this is happening on a regular basis,
+ you should look into using L<Bio::DB::GFF::Adaptor::dbi::pg_fts> which
+ implements the TSearch2 full text indexing scheme.
+
=cut
+
+ sub search_notes{
+ # my $self = shift;
+ # my ($search_string,$limit) = @_;
+ #
+ # $search_string =~ tr/*/%/s;
+ # $search_string = '%'.$search_string unless $search_string =~ /^\%/;
+ # $search_string = $search_string.'%' unless $search_string =~ /\%$/;
+ # warn "search_string:$search_string";
+ # my $query = FULLTEXTWILDCARD;
+ # $query .= " limit $limit" if defined $limit;
+ # my $sth = $self->dbh->do_query($query,$search_string);
+ #
+ # my @results;
+ # while (my ($class,$name,$note) = $sth->fetchrow_array) {
+ #
+ # next unless $class && $name; # sorry, ignore NULL objects
+ # my $featname = Bio::DB::GFF::Featname->new($class=>$name);
+ #
+ # push @results,[$featname,$note,0]; #gbrowse expects a score, but
+ # #pg doesn't give one, thus the 0
+ # }
+ # warn @results;
+ #
+ # return @results;
+ }
+
=head2 make_meta_set_query
More information about the Bioperl-guts-l
mailing list