[Biojava-l] ontology exception, addSequence & BioSQLSequenceDB
Matthew Pocock
matthew_pocock at yahoo.co.uk
Tue Oct 14 13:03:23 EDT 2003
Matthew Pocock wrote:
> More news when I have it.
I have more news :)
The version in CVS works for me. I had to do some work to the
OntologySQL class, and modify the biosql schema (which I've attached).
In the end, I removed the symptom by dissabling connection re-use in
JDBCConnectionPool. Could we either a) make JDBCConnectionPool work with
rollback gracefully or b) switch to a standard connection pool API? My
vote is for option 2, but then some of the things I read via google
indicated that we may have the same problems.
BioSQL People - could you check the single table I've added
(term_relationship_term) and see if it's sane for folding back into the
trunk of biosql development?
Matthew
-------------- next part --------------
-- $Id: biosqldb-pg.sql,v 1.2 2003/10/14 09:59:34 nmrp3 Exp $
--
-- conventions:
-- <table_name>_id is primary internal id (usually autogenerated)
-- Authors: Ewan Birney, Elia Stupka
-- Contributors: Hilmar Lapp, Aaron Mackey, Matthew Pocock
--
-- Copyright Ewan Birney. You may use, modify, and distribute this code under
-- the same terms as Perl. See the Perl Artistic License.
--
-- comments to biosql - biosql-l at open-bio.org
--
-- Migration of the MySQL schema to InnoDB by Hilmar Lapp <hlapp at gmx.net>
-- Post-Cape Town changes by Hilmar Lapp.
-- Singapore changes by Hilmar Lapp and Aaron Mackey.
--
-- database have bioentries. That is about it.
-- we do not store different versions of a database as different dbids
-- (there is no concept of versions of database). There is a concept of
-- versions of entries. Versions of databases deserve their own table and
-- join to bioentry table for tracking with versions of entries
CREATE SEQUENCE biodatabase_pk_seq;
CREATE TABLE biodatabase (
biodatabase_id INTEGER DEFAULT nextval ( 'biodatabase_pk_seq' ) NOT NULL ,
name VARCHAR ( 128 ) NOT NULL ,
authority VARCHAR ( 128 ) ,
description TEXT ,
PRIMARY KEY ( biodatabase_id ) ,
UNIQUE ( name ) ) ;
CREATE INDEX db_auth on biodatabase ( authority );
-- we could insist that taxa are NCBI taxon id, but on reflection I made this
-- an optional extra line, as many flat file formats do not have the NCBI id
--
-- no organelle/sub species
-- corresponds to the node table of the NCBI taxonomy databaase
CREATE SEQUENCE taxon_pk_seq;
CREATE TABLE taxon (
taxon_id INTEGER DEFAULT nextval ( 'taxon_pk_seq' ) NOT NULL ,
ncbi_taxon_id INTEGER ,
parent_taxon_id INTEGER ,
node_rank VARCHAR ( 32 ) ,
genetic_code SMALLINT ,
mito_genetic_code SMALLINT ,
left_value INTEGER ,
right_value INTEGER ,
PRIMARY KEY ( taxon_id ) ,
CONSTRAINT XAKtaxon_ncbi_taxon_id UNIQUE ( ncbi_taxon_id ) ,
CONSTRAINT XAKtaxon_left_value UNIQUE ( left_value ) ,
CONSTRAINT XAKtaxon_right_value UNIQUE ( right_value ) ) ;
CREATE INDEX taxparent ON taxon ( parent_taxon_id );
-- corresponds to the names table of the NCBI taxonomy databaase
CREATE TABLE taxon_name (
taxon_id INTEGER NOT NULL ,
name VARCHAR ( 255 ) NOT NULL ,
name_class VARCHAR ( 32 ) NOT NULL ,
UNIQUE ( name , name_class, taxon_id ) ) ;
CREATE INDEX taxnametaxonid ON taxon_name ( taxon_id );
CREATE INDEX taxnamename ON taxon_name ( name );
-- this is the namespace (controlled vocabulary) ontology terms live in
-- we chose to have a separate table for this instead of reusing biodatabase
CREATE SEQUENCE ontology_pk_seq;
CREATE TABLE ontology (
ontology_id INTEGER DEFAULT nextval ( 'ontology_pk_seq' ) NOT NULL ,
name VARCHAR ( 32 ) NOT NULL ,
definition TEXT ,
PRIMARY KEY ( ontology_id ) ,
UNIQUE ( name ) ) ;
-- any controlled vocab term, everything from full ontology
-- terms eg GO IDs to the various keys allowed as qualifiers
CREATE SEQUENCE term_pk_seq;
CREATE TABLE term (
term_id INTEGER DEFAULT nextval ( 'term_pk_seq' ) NOT NULL ,
name VARCHAR ( 255 ) NOT NULL ,
definition TEXT ,
identifier VARCHAR ( 40 ) ,
is_obsolete CHAR ( 1 ) ,
ontology_id INTEGER NOT NULL ,
PRIMARY KEY ( term_id ) ,
UNIQUE ( name , ontology_id ) ,
UNIQUE ( identifier ) ) ;
CREATE INDEX term_ont ON term ( ontology_id );
-- ontology terms have synonyms, here is how to store them
CREATE TABLE term_synonym (
synonym VARCHAR(255) NOT NULL,
term_id INTEGER NOT NULL,
PRIMARY KEY ( term_id , synonym ) ) ;
-- ontology terms to dbxref association: ontology terms have dbxrefs
CREATE TABLE term_dbxref (
term_id INTEGER NOT NULL ,
dbxref_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( term_id , dbxref_id ) ) ;
CREATE INDEX trmdbxref_dbxrefid ON term_dbxref ( dbxref_id );
-- relationship between controlled vocabulary / ontology term
-- we use subject/predicate/object but this could also
-- be thought of as child/relationship-type/parent.
-- the subject/predicate/object naming is better as we
-- can think of the graph as composed of statements.
--
-- we also treat the relationshiptypes / predicates as
-- controlled terms in themselves; this is quite useful
-- as a lot of systems (eg GO) will soon require
-- ontologies of relationship types (eg subtle differences
-- in the partOf relationship)
--
-- this table probably won't be filled for a while, the core
-- will just treat ontologies as flat lists of terms
CREATE SEQUENCE term_relationship_pk_seq;
CREATE TABLE term_relationship (
term_relationship_id INTEGER DEFAULT nextval ( 'term_relationship_pk_seq' ) NOT NULL ,
subject_term_id INTEGER NOT NULL ,
predicate_term_id INTEGER NOT NULL ,
object_term_id INTEGER NOT NULL ,
ontology_id INTEGER NOT NULL ,
PRIMARY KEY ( term_relationship_id ) ,
UNIQUE ( subject_term_id , predicate_term_id , object_term_id , ontology_id ) ) ;
CREATE INDEX trmrel_predicateid ON term_relationship ( predicate_term_id );
CREATE INDEX trmrel_objectid ON term_relationship ( object_term_id );
CREATE INDEX trmrel_ontid ON term_relationship ( ontology_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX trmrel_subjectid ON term_relationship(subject_term_id);
-- Link a triple to a term, giving it a unique identifier. This allows
-- arbitrary expressions to be built up by then using the linked-to term
-- in another triple.
--
-- e.g. implies(isa(x, y), hasa(x, z))
CREATE TABLE term_relationship_term (
term_relationship_id INTEGER NOT NULL,
term_id INTEGER NOT NULL,
PRIMARY KEY ( term_relationship_id, term_id ),
UNIQUE ( term_relationship_id ),
UNIQUE ( term_id ) );
-- the infamous transitive closure table on ontology term relationships
-- this is a warehouse approach - you will need to update this regularly
--
-- the triple of (subject, predicate, object) is the same as for ontology
-- relationships, with the exception of predicate being the greatest common
-- denominator of the relationships types visited in the path (i.e., if
-- relationship type A is-a relationship type B, the greatest common
-- denominator for path containing both types A and B is B)
--
-- See the GO database or Chado schema for other (and possibly better
-- documented) implementations of the transitive closure table approach.
CREATE SEQUENCE term_path_pk_seq;
CREATE TABLE term_path (
term_path_id INTEGER DEFAULT nextval ( 'term_path_pk_seq' ) NOT NULL ,
subject_term_id INTEGER NOT NULL ,
predicate_term_id INTEGER NOT NULL ,
object_term_id INTEGER NOT NULL ,
ontology_id INTEGER NOT NULL ,
distance INTEGER ,
PRIMARY KEY (term_path_id),
UNIQUE ( subject_term_id , predicate_term_id , object_term_id , ontology_id , distance ) ) ;
CREATE INDEX trmpath_predicateid ON term_path ( predicate_term_id );
CREATE INDEX trmpath_objectid ON term_path ( object_term_id );
CREATE INDEX trmpath_ontid ON term_path ( ontology_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX trmpath_subjectid ON term_path(subject_term_id);
-- we can be a bioentry without a biosequence, but not visa-versa
-- most things are going to be keyed off bioentry_id
--
-- accession is the stable id, display_id is a potentially volatile,
-- human readable name.
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
-- not all entries have a taxon, but many do.
--
-- one bioentry only has one taxon! (weirdo chimerias are not handled. tough)
--
-- Name maps to display_id in bioperl. We have a different column name
-- here to avoid confusion with the naming convention for foreign keys.
CREATE SEQUENCE bioentry_pk_seq;
CREATE TABLE bioentry (
bioentry_id INTEGER DEFAULT nextval ( 'bioentry_pk_seq' ) NOT NULL ,
biodatabase_id INTEGER NOT NULL ,
taxon_id INTEGER ,
name VARCHAR ( 40 ) NOT NULL ,
accession VARCHAR ( 40 ) NOT NULL ,
identifier VARCHAR ( 40 ) ,
division VARCHAR ( 6 ) ,
description TEXT ,
version INTEGER NOT NULL ,
PRIMARY KEY ( bioentry_id ) ,
UNIQUE ( accession , biodatabase_id , version ) ,
UNIQUE ( identifier ) ) ;
CREATE INDEX bioentry_name ON bioentry ( name );
CREATE INDEX bioentry_db ON bioentry ( biodatabase_id );
CREATE INDEX bioentry_tax ON bioentry ( taxon_id );
--
-- bioentry-bioentry relationships: these are typed
--
CREATE SEQUENCE bioentry_relationship_pk_seq;
CREATE TABLE bioentry_relationship (
bioentry_relationship_id INTEGER DEFAULT nextval ( 'bioentry_relationship_pk_seq' ) NOT NULL ,
object_bioentry_id INTEGER NOT NULL ,
subject_bioentry_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( bioentry_relationship_id ) ,
UNIQUE ( object_bioentry_id , subject_bioentry_id , term_id ) ) ;
CREATE INDEX bioentryrel_trm ON bioentry_relationship ( term_id );
CREATE INDEX bioentryrel_child ON bioentry_relationship ( subject_bioentry_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentryrel_parent ON bioentry_relationship(object_bioentry_id);
-- for deep (depth > 1) bioentry relationship trees we need a transitive
-- closure table too
CREATE TABLE bioentry_path (
object_bioentry_id INTEGER NOT NULL ,
subject_bioentry_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
distance INTEGER,
UNIQUE ( object_bioentry_id , subject_bioentry_id , term_id , distance ) ) ;
CREATE INDEX bioentrypath_trm ON bioentry_path ( term_id );
CREATE INDEX bioentrypath_child ON bioentry_path ( subject_bioentry_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentrypath_parent ON bioentry_path(object_bioentry_id);
-- some bioentries will have a sequence
-- biosequence because sequence is sometimes a reserved word
CREATE TABLE biosequence (
bioentry_id INTEGER NOT NULL ,
version INTEGER ,
length INTEGER ,
alphabet VARCHAR ( 10 ) ,
seq TEXT ,
PRIMARY KEY ( bioentry_id ) ) ;
-- add these only if you want them:
-- ALTER TABLE biosequence ADD COLUMN ( isoelec_pt NUMERIC(4,2) );
-- ALTER TABLE biosequence ADD COLUMN ( mol_wgt DOUBLE PRECISION );
-- ALTER TABLE biosequence ADD COLUMN ( perc_gc DOUBLE PRECISION );
-- database cross-references (e.g., GenBank:AC123456.1)
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
CREATE SEQUENCE dbxref_pk_seq;
CREATE TABLE dbxref (
dbxref_id INTEGER DEFAULT nextval ( 'dbxref_pk_seq' ) NOT NULL ,
dbname VARCHAR ( 40 ) NOT NULL ,
accession VARCHAR ( 40 ) NOT NULL ,
version INTEGER NOT NULL ,
PRIMARY KEY ( dbxref_id ) ,
UNIQUE ( accession , dbname , version ) ) ;
CREATE INDEX dbxref_db ON dbxref ( dbname );
-- for roundtripping embl/genbank, we need to have the "optional ID"
-- for the dbxref.
--
-- another use of this table could be for storing
-- descriptive text for a dbxref. for example, we may want to
-- know stuff about the interpro accessions we store (without
-- importing all of interpro), so we can attach the text
-- description as a synonym
CREATE TABLE dbxref_qualifier_value (
dbxref_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER NOT NULL DEFAULT 0 ,
value TEXT ,
PRIMARY KEY ( dbxref_id , term_id , rank ) ) ;
CREATE INDEX dbxrefqual_dbx ON dbxref_qualifier_value ( dbxref_id );
CREATE INDEX dbxrefqual_trm ON dbxref_qualifier_value ( term_id );
-- Direct dblinks. It is tempting to do this
-- from bioentry_id to bioentry_id. But that wont work
-- during updates of one database - we will have to edit
-- this table each time. Better to do the join through accession
-- and db each time. Should be almost as cheap
CREATE TABLE bioentry_dbxref (
bioentry_id INTEGER NOT NULL ,
dbxref_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( bioentry_id , dbxref_id ) ) ;
CREATE INDEX dblink_dbx ON bioentry_dbxref ( dbxref_id );
-- We can have multiple references per bioentry, but one reference
-- can also be used for the same bioentry.
--
-- No two references can reference the same reference database entry
-- (dbxref_id). This is where the MEDLINE id goes: PUBMED:123456.
CREATE SEQUENCE reference_pk_seq;
CREATE TABLE reference (
reference_id INTEGER DEFAULT nextval ( 'reference_pk_seq' ) NOT NULL ,
dbxref_id INTEGER ,
location TEXT NOT NULL ,
title TEXT ,
authors TEXT NOT NULL ,
crc VARCHAR ( 32 ) ,
PRIMARY KEY ( reference_id ) ,
UNIQUE ( dbxref_id ) ,
UNIQUE ( crc ) ) ;
-- bioentry to reference associations
CREATE TABLE bioentry_reference (
bioentry_id INTEGER NOT NULL ,
reference_id INTEGER NOT NULL ,
start_pos INTEGER ,
end_pos INTEGER ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( bioentry_id , reference_id , rank ) ) ;
CREATE INDEX bioentryref_ref ON bioentry_reference ( reference_id );
-- We can have multiple comments per seqentry, and
-- comments can have embedded '\n' characters
CREATE SEQUENCE comment_pk_seq;
CREATE TABLE comment (
comment_id INTEGER DEFAULT nextval ( 'comment_pk_seq' ) NOT NULL ,
bioentry_id INTEGER NOT NULL ,
comment_text TEXT NOT NULL ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( comment_id ) ,
UNIQUE ( bioentry_id , rank ) ) ;
-- tag/value and ontology term annotation for bioentries goes here
CREATE TABLE bioentry_qualifier_value (
bioentry_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
value TEXT ,
rank INTEGER NOT NULL DEFAULT 0 ,
UNIQUE ( bioentry_id , term_id , rank ) ) ;
CREATE INDEX bioentryqual_trm ON bioentry_qualifier_value ( term_id );
-- feature table. We cleanly handle
-- - simple locations
-- - split locations
-- - split locations on remote sequences
CREATE SEQUENCE seqfeature_pk_seq;
CREATE TABLE seqfeature (
seqfeature_id INTEGER DEFAULT nextval ( 'seqfeature_pk_seq' ) NOT NULL ,
bioentry_id INTEGER NOT NULL ,
type_term_id INTEGER NOT NULL ,
source_term_id INTEGER NOT NULL ,
display_name VARCHAR ( 64 ) ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( seqfeature_id ) ,
UNIQUE ( bioentry_id , type_term_id , source_term_id , rank ) ) ;
CREATE INDEX seqfeature_trm ON seqfeature ( type_term_id );
CREATE INDEX seqfeature_fsrc ON seqfeature ( source_term_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeature_bioentryid ON seqfeature(bioentry_id);
-- seqfeatures can be arranged in containment hierarchies.
-- one can imagine storing other relationships between features,
-- in this case the term_id can be used to type the relationship
CREATE SEQUENCE seqfeature_relationship_pk_seq;
CREATE TABLE seqfeature_relationship (
seqfeature_relationship_id INTEGER DEFAULT nextval ( 'seqfeature_relationship_pk_seq' ) NOT NULL ,
object_seqfeature_id INTEGER NOT NULL ,
subject_seqfeature_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( seqfeature_relationship_id ) ,
UNIQUE ( object_seqfeature_id , subject_seqfeature_id , term_id ) ) ;
CREATE INDEX seqfeaturerel_trm ON seqfeature_relationship ( term_id );
CREATE INDEX seqfeaturerel_child ON seqfeature_relationship ( subject_seqfeature_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_relationship(object_seqfeature_id);
-- for deep (depth > 1) seqfeature relationship trees we need a transitive
-- closure table too
CREATE TABLE seqfeature_path (
object_seqfeature_id INTEGER NOT NULL ,
subject_seqfeature_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
distance INTEGER,
UNIQUE ( object_seqfeature_id , subject_seqfeature_id , term_id , distance ) ) ;
CREATE INDEX seqfeaturepath_trm ON seqfeature_path ( term_id );
CREATE INDEX seqfeaturepath_child ON seqfeature_path ( subject_seqfeature_id );
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_path(object_seqfeature_id);
-- tag/value associations - or ontology annotations
CREATE TABLE seqfeature_qualifier_value (
seqfeature_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
rank INTEGER NOT NULL DEFAULT 0 ,
value TEXT NOT NULL ,
PRIMARY KEY ( seqfeature_id , term_id , rank ) ) ;
CREATE INDEX seqfeaturequal_trm ON seqfeature_qualifier_value ( term_id );
-- DBXrefs for features. This is necessary for genome oriented viewpoints,
-- where you have a few have long sequences (contigs, or chromosomes) with many
-- features on them. In that case the features are the semantic scope for
-- their annotation bundles, not the bioentry they are attached to.
CREATE TABLE seqfeature_dbxref (
seqfeature_id INTEGER NOT NULL ,
dbxref_id INTEGER NOT NULL ,
rank INTEGER ,
PRIMARY KEY ( seqfeature_id , dbxref_id ) ) ;
CREATE INDEX feadblink_dbx ON seqfeature_dbxref ( dbxref_id );
-- basically we model everything as potentially having
-- any number of locations, ie, a split location. SimpleLocations
-- just have one location. We need to have a location id for the qualifier
-- associations of fuzzy locations.
--
-- please do not try to model complex assemblies with this thing. It wont
-- work. Check out the ensembl schema for this.
--
-- we allow nulls for start/end - this is useful for fuzzies as
-- standard range queries will not be included
--
-- for remote locations, the join to make is to DBXref
--
-- the FK to term is a possibility to store the type of the
-- location for determining in one hit whether it's a fuzzy or not
CREATE SEQUENCE location_pk_seq;
CREATE TABLE location (
location_id INTEGER DEFAULT nextval ( 'location_pk_seq' ) NOT NULL ,
seqfeature_id INTEGER NOT NULL ,
dbxref_id INTEGER ,
term_id INTEGER ,
start_pos INTEGER ,
end_pos INTEGER ,
strand INTEGER ,
rank INTEGER NOT NULL DEFAULT 0 ,
PRIMARY KEY ( location_id ) ,
UNIQUE ( seqfeature_id , rank ) ) ;
CREATE INDEX seqfeatureloc_start ON location ( start_pos, end_pos );
CREATE INDEX seqfeatureloc_dbx ON location ( dbxref_id );
CREATE INDEX seqfeatureloc_trm ON location ( term_id );
-- location qualifiers - mainly intended for fuzzies but anything
-- can go in here
-- some controlled vocab terms have slots;
-- fuzzies could be modeled as min_start(5), max_start(5)
--
-- there is no restriction on extending the fuzzy ontology
-- for your own nefarious aims, although the bio* apis will
-- most likely ignore these
CREATE TABLE location_qualifier_value (
location_id INTEGER NOT NULL ,
term_id INTEGER NOT NULL ,
value VARCHAR ( 255 ) NOT NULL ,
int_value INTEGER ,
PRIMARY KEY ( location_id , term_id ) ) ;
CREATE INDEX locationqual_trm ON location_qualifier_value ( term_id );
--
-- Create the foreign key constraints
--
-- ontology term
ALTER TABLE term ADD CONSTRAINT FKont_term
FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
ON DELETE CASCADE ;
-- term synonyms
ALTER TABLE term_synonym ADD CONSTRAINT FKterm_syn
FOREIGN KEY ( term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
-- term_dbxref
ALTER TABLE term_dbxref ADD CONSTRAINT FKdbxref_trmdbxref
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
ALTER TABLE term_dbxref ADD CONSTRAINT FKterm_trmdbxref
FOREIGN KEY ( term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
-- term_relationship
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmsubject_trmrel
FOREIGN KEY ( subject_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmpredicate_trmrel
FOREIGN KEY ( predicate_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmobject_trmrel
FOREIGN KEY ( object_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_relationship ADD CONSTRAINT FKontology_trmrel
FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
ON DELETE CASCADE ;
-- term_path
ALTER TABLE term_path ADD CONSTRAINT FKtrmsubject_trmpath
FOREIGN KEY ( subject_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_path ADD CONSTRAINT FKtrmpredicate_trmpath
FOREIGN KEY ( predicate_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_path ADD CONSTRAINT FKtrmobject_trmpath
FOREIGN KEY ( object_term_id ) REFERENCES term ( term_id )
ON DELETE CASCADE ;
ALTER TABLE term_path ADD CONSTRAINT FKontology_trmpath
FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
ON DELETE CASCADE ;
-- taxon, taxon_name
-- unfortunately, we can't constrain parent_taxon_id as it is violated
-- occasionally by the downloads available from NCBI
-- ALTER TABLE taxon ADD CONSTRAINT FKtaxon_taxon
-- FOREIGN KEY ( parent_taxon_id ) REFERENCES taxon ( taxon_id )
-- DEFERRABLE;
ALTER TABLE taxon_name ADD CONSTRAINT FKtaxon_taxonname
FOREIGN KEY ( taxon_id ) REFERENCES taxon ( taxon_id )
ON DELETE CASCADE ;
-- bioentry
ALTER TABLE bioentry ADD CONSTRAINT FKtaxon_bioentry
FOREIGN KEY ( taxon_id ) REFERENCES taxon ( taxon_id ) ;
ALTER TABLE bioentry ADD CONSTRAINT FKbiodatabase_bioentry
FOREIGN KEY ( biodatabase_id ) REFERENCES biodatabase ( biodatabase_id ) ;
-- bioentry_relationship
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKterm_bioentryrel
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKparentent_bioentryrel
FOREIGN KEY ( object_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKchildent_bioentryrel
FOREIGN KEY ( subject_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- bioentry_path
ALTER TABLE bioentry_path ADD CONSTRAINT FKterm_bioentrypath
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE bioentry_path ADD CONSTRAINT FKparentent_bioentrypath
FOREIGN KEY ( object_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_path ADD CONSTRAINT FKchildent_bioentrypath
FOREIGN KEY ( subject_bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- biosequence
ALTER TABLE biosequence ADD CONSTRAINT FKbioentry_bioseq
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- comment
ALTER TABLE comment ADD CONSTRAINT FKbioentry_comment
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- bioentry_dbxref
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKbioentry_dblink
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKdbxref_dblink
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
-- dbxref_qualifier_value
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKtrm_dbxrefqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKdbxref_dbxrefqual
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
-- bioentry_reference
ALTER TABLE bioentry_reference ADD CONSTRAINT FKbioentry_entryref
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_reference ADD CONSTRAINT FKreference_entryref
FOREIGN KEY ( reference_id ) REFERENCES reference ( reference_id )
ON DELETE CASCADE ;
-- reference
ALTER TABLE reference ADD CONSTRAINT FKdbxref_reference
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
-- bioentry_qualifier_value
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKbioentry_entqual
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKterm_entqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
-- seqfeature
ALTER TABLE seqfeature ADD CONSTRAINT FKterm_seqfeature
FOREIGN KEY ( type_term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature ADD CONSTRAINT FKsourceterm_seqfeature
FOREIGN KEY ( source_term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature ADD CONSTRAINT FKbioentry_seqfeature
FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
ON DELETE CASCADE ;
-- seqfeature_relationship
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKterm_seqfeatrel
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKparentfeat_seqfeatrel
FOREIGN KEY ( object_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKchildfeat_seqfeatrel
FOREIGN KEY ( subject_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
-- seqfeature_path
ALTER TABLE seqfeature_path ADD CONSTRAINT FKterm_seqfeatpath
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKparentfeat_seqfeatpath
FOREIGN KEY ( object_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKchildfeat_seqfeatpath
FOREIGN KEY ( subject_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
-- seqfeature_qualifier_value
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKterm_featqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKseqfeature_featqual
FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
-- seqfeature_dbxref
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKseqfeature_feadblink
FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKdbxref_feadblink
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
ON DELETE CASCADE ;
-- location
ALTER TABLE location ADD CONSTRAINT FKseqfeature_location
FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
ON DELETE CASCADE ;
ALTER TABLE location ADD CONSTRAINT FKdbxref_location
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
ALTER TABLE location ADD CONSTRAINT FKterm_featloc
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
-- location_qualifier_value
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKfeatloc_locqual
FOREIGN KEY ( location_id ) REFERENCES location ( location_id )
ON DELETE CASCADE ;
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKterm_locqual
FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
--
-- This is to solve a problem arising from how transactions are implemented
-- in Postgres as opposed to, e.g., Oracle and InnoDB (MySQL). In short, the
-- difference is that in the latter RDBMSs' implementation, if a particular
-- statement within a transaction fails, the preceding (and possibly
-- subsequent) statements are still valid. On commit, all succeeded statements
-- are committed. In Postgres, the failure of a statement invalidates all
-- preceding statements within the same transaction as well as all subsequent,
-- if any.
--
-- This leads to a problem if you program SQL insert and update statements
-- such that presence of the record you attempt to insert is indicated by
-- failure of the statement due to a unique key constraint violation. Even
-- if your code is prepared to handle the failure by e.g. looking up the
-- record, in the case of Postgres this approach cannot work unless you
-- commit every single statement.
--
-- The bioperl-db adaptor code uses the aforementioned approach and is
-- currently dependent on the following support code. If you are not going
-- to use bioperl-db to populate the database, you may comment out all
-- rules, as then they might add another look-up to one already done on the
-- code that you use and hence add unnecessary overhead.
--
CREATE RULE rule_bioentry_i1
AS ON INSERT TO bioentry
WHERE (SELECT oid FROM bioentry WHERE identifier = new.identifier)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_i2
AS ON INSERT TO bioentry
WHERE (
SELECT oid FROM bioentry
WHERE accession = new.accession
AND biodatabase_id = new.biodatabase_id
AND version = new.version
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_biodatabase_i
AS ON INSERT TO biodatabase
WHERE (SELECT oid FROM biodatabase WHERE name = new.name)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_dbxref_i
AS ON INSERT TO bioentry_dbxref
WHERE (
SELECT oid FROM bioentry_dbxref
WHERE bioentry_id = new.bioentry_id
AND dbxref_id = new.dbxref_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_path_i
AS ON INSERT TO bioentry_path
WHERE (
SELECT oid FROM bioentry_relationship
WHERE object_bioentry_id = new.object_bioentry_id
AND subject_bioentry_id= new.subject_bioentry_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_qualifier_value_i
AS ON INSERT TO bioentry_qualifier_value
WHERE (
SELECT oid FROM bioentry_qualifier_value
WHERE bioentry_id = new.bioentry_id
AND term_id = new.term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_reference_i
AS ON INSERT TO bioentry_reference
WHERE (
SELECT oid FROM bioentry_reference
WHERE bioentry_id = new.bioentry_id
AND reference_id = new.reference_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_bioentry_relationship_i
AS ON INSERT TO bioentry_relationship
WHERE (
SELECT oid FROM bioentry_relationship
WHERE object_bioentry_id = new.object_bioentry_id
AND subject_bioentry_id= new.subject_bioentry_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_biosequence_i
AS ON INSERT TO biosequence
WHERE (SELECT oid FROM biosequence WHERE bioentry_id = new.bioentry_id)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_comment_i
AS ON INSERT TO comment
WHERE (
SELECT oid FROM comment
WHERE bioentry_id = new.bioentry_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_dbxref_i
AS ON INSERT TO dbxref
WHERE (
SELECT oid FROM dbxref
WHERE accession = new.accession
AND dbname = new.dbname
AND version = new.version
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_dbxref_qualifier_value_i
AS ON INSERT TO dbxref_qualifier_value
WHERE (
SELECT oid FROM dbxref_qualifier_value
WHERE dbxref_id = new.dbxref_id
AND term_id = new.term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_location_i
AS ON INSERT TO location
WHERE (
SELECT oid FROM location
WHERE seqfeature_id = new.seqfeature_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_location_qualifier_value_i
AS ON INSERT TO location_qualifier_value
WHERE (
SELECT oid FROM location_qualifier_value
WHERE location_id = new.location_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_ontology_i
AS ON INSERT TO ontology
WHERE (SELECT oid FROM ontology WHERE name = new.name)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_reference_i1
AS ON INSERT TO reference
WHERE (SELECT oid FROM reference WHERE crc = new.crc)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_reference_i2
AS ON INSERT TO reference
WHERE (SELECT oid FROM reference WHERE dbxref_id = new.dbxref_id)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_i
AS ON INSERT TO seqfeature
WHERE (
SELECT oid FROM seqfeature
WHERE bioentry_id = new.bioentry_id
AND type_term_id = new.type_term_id
AND source_term_id = new.source_term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_dbxref_i
AS ON INSERT TO seqfeature_dbxref
WHERE (
SELECT oid FROM seqfeature_dbxref
WHERE seqfeature_id = new.seqfeature_id
AND dbxref_id = new.dbxref_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_path_i
AS ON INSERT TO seqfeature_path
WHERE (
SELECT oid FROM seqfeature_path
WHERE object_seqfeature_id = new.object_seqfeature_id
AND subject_seqfeature_id= new.subject_seqfeature_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_qualifier_value_i
AS ON INSERT TO seqfeature_qualifier_value
WHERE (
SELECT oid FROM seqfeature_qualifier_value
WHERE seqfeature_id = new.seqfeature_id
AND term_id = new.term_id
AND rank = new.rank
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_seqfeature_relationship_i
AS ON INSERT TO seqfeature_relationship
WHERE (
SELECT oid FROM seqfeature_relationship
WHERE object_seqfeature_id = new.object_seqfeature_id
AND subject_seqfeature_id= new.subject_seqfeature_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_taxon_i
AS ON INSERT TO taxon
WHERE (SELECT oid FROM taxon WHERE ncbi_taxon_id = new.ncbi_taxon_id)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_taxon_name_i
AS ON INSERT TO taxon_name
WHERE (
SELECT oid FROM taxon_name
WHERE taxon_id = new.taxon_id
AND name = new.name
AND name_class = new.name_class
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_i1
AS ON INSERT TO term
WHERE (SELECT oid FROM term WHERE identifier = new.identifier)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_i2
AS ON INSERT TO term
WHERE (
SELECT oid FROM term
WHERE name = new.name
AND ontology_id = new.ontology_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_dbxref_i
AS ON INSERT TO term_dbxref
WHERE (
SELECT oid FROM term_dbxref
WHERE dbxref_id = new.dbxref_id
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_path_i
AS ON INSERT TO term_path
WHERE (
SELECT oid FROM term_path
WHERE subject_term_id = new.subject_term_id
AND predicate_term_id = new.predicate_term_id
AND object_term_id = new.object_term_id
AND ontology_id = new.ontology_id
AND distance = new.distance
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_relationship_i
AS ON INSERT TO term_relationship
WHERE (
SELECT oid FROM term_relationship
WHERE subject_term_id = new.subject_term_id
AND predicate_term_id = new.predicate_term_id
AND object_term_id = new.object_term_id
AND ontology_id = new.ontology_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
CREATE RULE rule_term_synonym_i
AS ON INSERT TO term_synonym
WHERE (
SELECT oid FROM term_synonym
WHERE synonym = new.synonym
AND term_id = new.term_id
)
IS NOT NULL
DO INSTEAD NOTHING
;
--
-- Functions that may be used as an API by applications, e.g. load scripts etc.
--
-- this is used by load_ncbi_taxonomy.pl to speed up loading into the taxon
-- table by 1 to 2 orders of magnitude
CREATE OR REPLACE FUNCTION unconstrain_taxon ()
RETURNS INTEGER
AS
'
DROP RULE rule_taxon_i ON taxon;
SELECT 1;
'
LANGUAGE SQL
VOLATILE STRICT SECURITY DEFINER
;
-- this function re-establishes what unconstrain_taxon() removed temporarily
CREATE OR REPLACE FUNCTION constrain_taxon ()
RETURNS INTEGER
AS
'
CREATE RULE rule_taxon_i
AS ON INSERT TO taxon
WHERE (SELECT oid FROM taxon WHERE ncbi_taxon_id = new.ncbi_taxon_id)
IS NOT NULL
DO INSTEAD NOTHING
;
SELECT 1;
'
LANGUAGE SQL
VOLATILE STRICT SECURITY DEFINER
;
More information about the Biojava-l
mailing list