[Biopython-dev] Re: BioSQL upgrade [3 of 3]
Yves Bastide
Yves.Bastide at irisa.fr
Fri Nov 22 17:48:53 EST 2002
The big one.
-------------- next part --------------
Index: BioSQL/BioSeq.py
===================================================================
RCS file: /home/repository/biopython/biopython/BioSQL/BioSeq.py,v
retrieving revision 1.8
diff -u -p -r1.8 BioSeq.py
--- BioSQL/BioSeq.py 2002/03/01 12:22:05 1.8
+++ BioSQL/BioSeq.py 2002/11/22 22:13:21
@@ -14,7 +14,7 @@ class DBSeq: # This implements the biop
def __getattr__(self, name):
if name == "data":
return self.tostring()
- raise AttributeError(name)
+ raise AttributeError, name
def __len__(self):
return self._length
@@ -55,18 +55,20 @@ class DBInternalSeq:
self.primary_id = primary_id
self.adaptor = adaptor
- self.name, self.id, _length, self.moltype = \
+ self.name, self.id, _length, self.description, self.moltype = \
self.adaptor.execute_one(
"""select en.display_id, en.accession, length(bs.biosequence_str),
- bs.molecule
- from bioentry en, biosequence bs
- where bs.bioentry_id = en.bioentry_id and
- bs.bioentry_id = %s""",
+ en.description, bs.alphabet
+ from bioentry en, biosequence bs
+ where bs.bioentry_id = en.bioentry_id and
+ bs.bioentry_id = %d""",
(self.primary_id,))
self._length = int(_length)
-
+
def __getattr__(self, name):
+ if name[:1] == '_':
+ raise AttributeError, name
if name == "seq":
moltype = self.moltype.upper()
from Bio.Alphabet import IUPAC
@@ -84,23 +86,9 @@ class DBInternalSeq:
return seq
f = getattr(self, "_get_" + name, None)
if f is None:
- raise AttributeError(name)
+ raise AttributeError, name
return f()
- def _get_description(self):
- descr_results = _get_ontology_terms("description", self.primary_id,
- self.adaptor)
- if len(descr_results) == 0:
- description = ""
- elif len(descr_results) == 1:
- description = descr_results[0]
- else:
- raise ValueError("Got multiple unexpected descriptions: %s" %
- descr_results)
-
- self.description = description
- return description
-
def __len__(self):
return self._length
@@ -110,10 +98,13 @@ def _get_ontology_terms(ontology_name, b
sql = r"SELECT ontology_term_id FROM ontology_term " \
r"WHERE term_name = %s"
id_info = adaptor.execute_and_fetchall(sql, (ontology_name,))
+ if id_info is None:
+ return None
+
ontology_id = id_info[0][0]
sql = r"SELECT qualifier_value FROM bioentry_qualifier_value " \
- r"WHERE bioentry_id = %s AND ontology_term_id = %s"
+ r"WHERE bioentry_id = %d AND ontology_term_id = %s"
values = adaptor.execute_and_fetch_col0(sql, (bioentry_id,
ontology_id))
return values
@@ -190,7 +181,7 @@ class Species:
elif name == "genus":
return self.classification[1]
- raise AttributeError(name)
+ raise AttributeError, name
def __setattr__(self, name, val):
if name == "species":
@@ -212,9 +203,11 @@ class Annotation:
self.primary_id = primary_id
def __getattr__(self, name):
+ if name[:1] == '_':
+ raise AttributeError, name
f = getattr(self, "_get_" + name, None)
if f is None:
- raise AttributeError(name)
+ raise AttributeError, name
return f()
# functions to make this more like a dictionary
@@ -222,7 +215,7 @@ class Annotation:
if key in ["comments", "dblinks", "references"]:
return getattr(self, key)
else:
- raise KeyError("Unexpected item: %s")
+ raise KeyError("Unexpected item: %s" % key)
def has_key(self, key):
if key in ["comments", "dblinks", "references"]:
@@ -236,7 +229,7 @@ class Annotation:
def _get_comments(self):
comments = self.adaptor.execute_and_fetch_col0(
- """select comment_text from comment where bioentry_id = %s""",
+ """select comment_text from comment where bioentry_id = %d""",
(self.primary_id,))
self.comments = comments
return comments
@@ -244,7 +237,7 @@ class Annotation:
def _get_dblinks(self):
dblink_info = self.adaptor.execute_and_fetchall(
"""select dbname, accession from bioentry_direct_links
- where source_bioentry_id = %s""",
+ where source_bioentry_id = %d""",
(self.primary_id,))
dblinks = [DBLink(database, primary_id) for (database, primary_id)
in dblink_info]
@@ -255,7 +248,7 @@ class Annotation:
results = self.adaptor.execute_and_fetchall(
"""select reference_id, reference_start,reference_end
from bioentry_reference
- where bioentry_id = %s
+ where bioentry_id = %d
order by reference_rank""",
(self.primary_id,))
@@ -284,8 +277,8 @@ def load_seq_features(adaptor, primary_i
from Bio import SeqFeature
# Get the seqfeature id list
- sql = r"SELECT seqfeature_id, seqfeature_rank, seqfeature_key_id " \
- r"FROM seqfeature WHERE bioentry_id = %s"
+ sql = r"SELECT seqfeature_id, seqfeature_rank, ontology_term_id " \
+ r"FROM seqfeature WHERE bioentry_id = %d"
results = adaptor.execute_and_fetchall(sql, (primary_id,))
seq_feature_list = []
@@ -324,9 +317,9 @@ def load_seq_features(adaptor, primary_i
# Get any remote reference information
remote_results = adaptor.execute_and_fetchall("""
- SELECT rem.seqfeature_location_id, rem.accession, rem.version
- FROM remote_seqfeature_name rem, seqfeature_location sfl
- WHERE rem.seqfeature_location_id = sfl.seqfeature_location_id AND
+ SELECT seqfeature_location_id, accession, version
+ FROM seqfeature_location sfl, dbxref drf
+ WHERE drf.dbxref_id = sfl.dbxref_id AND
sfl.seqfeature_id = %s""",
(seqfeature_id,))
# Do the merge locally
@@ -387,10 +380,10 @@ class DBSeqRecord:
self.version, _length, self.division = \
self.adaptor.execute_one(
- """select en.entry_version, length(bs.biosequence_str), en.division
+ """select en.entry_version, length(bs.biosequence_str), bs.division
from bioentry en, biosequence bs
where bs.bioentry_id = en.bioentry_id and
- bs.bioentry_id = %s""",
+ bs.bioentry_id = %d""",
(self.primary_id,))
self._length = int(_length)
@@ -399,12 +392,12 @@ class DBSeqRecord:
def __getattr__(self, name):
if name[:1] == "_":
- raise AttributeError(name)
+ raise AttributeError, name
if name in self._forward_getattr:
return getattr(self.primary_seq, name)
f = getattr(self, "_get_" + name, None)
if f is None:
- raise AttributeError(name)
+ raise AttributeError, name
return f()
def _get_primary_seq(self):
@@ -430,14 +423,14 @@ class DBSeqRecord:
def _get_dates(self):
self.dates = _get_ontology_terms("date", self.primary_id, self.adaptor)
- return dates
+ return self.dates
def _get_species(self):
full_lineage, common_name = self.adaptor.execute_one(
"""select tx.full_lineage, tx.common_name
- from taxa tx, bioentry_taxa bt
- where tx.taxa_id = bt.taxa_id and
- bt.bioentry_id = %s""",
+ from taxon tx, bioentry be
+ where tx.taxon_id = be.taxon_id and
+ be.bioentry_id = %d""",
(self.primary_id,))
terms = full_lineage.split(":")
species = Species(terms, common_name)
@@ -448,9 +441,7 @@ class DBSeqRecord:
return version
def _get_keywords(self):
- keywords = self.adaptor.execute_and_fetch_col0(
- """select keywords from bioentry_keywords
- where bioentry_id = %s""",
- (self.primary_id,))
+ keywords = _get_ontology_term('Keywords', self.primary_id,
+ self.adaptor)
self.keywords = keywords
return keywords
Index: BioSQL/BioSeqDatabase.py
===================================================================
RCS file: /home/repository/biopython/biopython/BioSQL/BioSeqDatabase.py,v
retrieving revision 1.11
diff -u -p -r1.11 BioSeqDatabase.py
--- BioSQL/BioSeqDatabase.py 2002/11/20 15:38:11 1.11
+++ BioSQL/BioSeqDatabase.py 2002/11/22 22:13:21
@@ -58,11 +58,12 @@ def open_database(driver = "MySQLdb", *a
return DBServer(conn, module)
class DBServer:
- def __init__(self, conn, module):
+ def __init__(self, conn, module, module_name = None):
self.module = module
- if module.__name__ == 'psycopg':
+ if module_name is None: module_name = module.__name__
+ if module_name == 'psycopg':
create_dbutils = DBUtils.create_Pg_dbutils
- elif module.__name__ == 'MySQLdb':
+ elif module_name == 'MySQLdb':
create_dbutils = DBUtils.create_Mysql_dbutils
else:
create_dbutils = DBUtils.create_Generic_dbutils
@@ -123,7 +124,7 @@ class Adaptor:
def __init__(self, conn, create_dbutils):
self.conn = conn
self.cursor = conn.cursor()
- self.dbutils = create_dbutils()##self.conn, self.cursor)
+ self.dbutils = create_dbutils()
def last_id(self, table):
return self.dbutils.last_id(self.cursor, table)
@@ -131,6 +132,12 @@ class Adaptor:
def autocommit(self, y = 1):
return self.dbutils.autocommit(self.conn, y)
+ def commit(self):
+ return self.conn.commit()
+
+ def rollback(self):
+ return self.conn.rollback()
+
def fetch_dbid_by_dbname(self, dbname):
self.cursor.execute(
r"select biodatabase_id from biodatabase where name = %s",
@@ -138,14 +145,17 @@ class Adaptor:
rv = self.cursor.fetchall()
if not rv:
raise KeyError("Cannot find biodatabase with name %r" % dbname)
- assert len(rv) == 1, "More than one biodatabase with name %r" % dbname
+ # Cannot happen (UK)
+## assert len(rv) == 1, "More than one biodatabase with name %r" % dbname
return rv[0][0]
def fetch_seqid_by_display_id(self, dbid, name):
- self.cursor.execute(
- r"select bioentry_id from bioentry where "
- r" biodatabase_id = %s and display_id = %s",
- (dbid, name))
+ sql = r"select bioentry_id from bioentry where display_id = %s"
+ fields = [name]
+ if dbid:
+ sql += " and biodatabase_id = %d"
+ fields.append(dbid)
+ self.cursor.execute(sql, fields)
rv = self.cursor.fetchall()
if not rv:
raise IndexError("Cannot find display id %r" % name)
@@ -153,13 +163,17 @@ class Adaptor:
return rv[0][0]
def fetch_seqid_by_accession(self, dbid, name):
- self.cursor.execute(
- r"select bioentry_id from bioentry where "
- r" biodatabase_id = %s and accession = %s",
- (dbid, name))
+ sql = r"select bioentry_id from bioentry where accession = %s"
+ fields = [name]
+ if dbid:
+ sql += " and biodatabase_id = %d"
+ fields.append(dbid)
+
+ self.cursor.execute(sql, fields)
rv = self.cursor.fetchall()
if not rv:
raise IndexError("Cannot find accession %r" % name)
+ # Can happen: several versions (or biodatabases)
assert len(rv) == 1, "More than one entry with accession of %r" % name
return rv[0][0]
@@ -203,6 +217,8 @@ class Adaptor:
def execute(self, sql, args):
"""Just execute an sql command.
"""
+## print "sql:", `sql`
+## print "args:", `args`
self.cursor.execute(sql, args)
def get_subseq_as_string(self, seqid, start, end):
Index: BioSQL/Loader.py
===================================================================
RCS file: /home/repository/biopython/biopython/BioSQL/Loader.py,v
retrieving revision 1.9
diff -u -p -r1.9 Loader.py
--- BioSQL/Loader.py 2002/11/20 15:38:11 1.9
+++ BioSQL/Loader.py 2002/11/22 22:13:21
@@ -11,6 +11,8 @@ from time import gmtime, strftime
# biopython
from Bio import Alphabet
+from Bio.crc import crc64
+
class DatabaseLoader:
"""Load a database with biopython objects.
"""
@@ -27,40 +29,135 @@ class DatabaseLoader:
"""
bioentry_id = self._load_bioentry_table(record)
self._load_bioentry_date(record, bioentry_id)
- # self._load_bioentry_taxa(record, bioentry_id)
self._load_biosequence(record, bioentry_id)
- self._load_bioentry_description(record, bioentry_id)
+ self._load_comment(record, bioentry_id)
+ references = record.annotations.get('references', ())
+ for reference, rank in zip(references, range(len(references))):
+ self._load_reference(reference, rank, bioentry_id)
for seq_feature_num in range(len(record.features)):
seq_feature = record.features[seq_feature_num]
self._load_seqfeature(seq_feature, seq_feature_num, bioentry_id)
- def _get_ontology_id(self, term_name, term_description = ""):
+ def _get_ontology_id(self,
+ term_name,
+ term_description = None,
+ term_identifier = None,
+ category_id = 0):
"""Get the id that corresponds to any term in an ontology.
This looks through the ontology table for a the given term. If it
is not found, a new id corresponding to this ontology is created.
In either case, the id corresponding to that term is returned, so
that you can reference it in another table.
+
+ The category_id can be needed to disambiguate the term:
+ it will be used if != 0.
"""
+
# try to get the ontology term
sql = r"SELECT ontology_term_id FROM ontology_term " \
r"WHERE term_name = %s"
- id_results = self.adaptor.execute_and_fetchall(sql, (term_name,))
+ fields = [term_name]
+ if category_id != 0: # 'None' is legitimate
+ sql += ' AND category_id '
+ if category_id is None:
+ sql += 'IS NULL'
+ else:
+ sql += '= %d'
+ fields.append(category_id)
+ id_results = self.adaptor.execute_and_fetchall(sql, fields)
# something is wrong
if len(id_results) > 1:
raise ValueError("Multiple ontology ids for %s: %s" %
- term_name, id_results)
+ (term_name, id_results))
# we already have the ontology term inserted
elif len(id_results) == 1:
return id_results[0][0]
# we need to create it
else:
- sql = r"INSERT INTO ontology_term (term_name, term_definition)" \
- r"VALUES (%s, %s)"
- self.adaptor.execute(sql, (term_name, term_description))
- # recursively call this to give back the id
- return self._get_ontology_id(term_name, term_description)
+ # If no category_id specified, set it to null, as 0 isn't possible
+ if category_id == 0: category_id = None
+
+ sql = r"INSERT INTO ontology_term (term_name, term_definition," \
+ r" term_identifier, category_id)" \
+ r" VALUES (%s, %s, %s, %d)"
+ self.adaptor.execute(sql, (term_name, term_description,
+ term_identifier, category_id))
+ return self.adaptor.last_id('ontology_term')
+ def _get_taxon_id(self, record):
+ """Get the id corresponding to a taxon.
+
+ If the species isn't in the taxon table, it is created.
+
+ The code to find the species in the record is brittle.
+ """
+ # Binomial and full lineage
+ try:
+ binomial = record.annotations["organism"]
+ except KeyError:
+ binomial = None
+
+ # XXX no variant
+ variant = '-'
+
+ if binomial and variant:
+ sql = "SELECT taxon_id FROM taxon WHERE binomial = %s" \
+ " AND variant = %s"
+ taxa = self.adaptor.execute_and_fetchall(sql, (binomial, variant))
+ if taxa:
+ return taxa[0][0]
+
+ # Didn't found the binomial/variant... Let's try with the taxon id
+ ncbi_taxon_id = None
+ for f in record.features:
+ if (f.type == 'source' and getattr(f, 'qualifiers', None)
+ and f.qualifiers.has_key('db_xref')):
+ for db_xref in f.qualifiers['db_xref']:
+ if db_xref[:6] == 'taxon:':
+ ncbi_taxon_id = int(db_xref[6:])
+ break
+ if ncbi_taxon_id: break
+
+ if ncbi_taxon_id:
+ sql = "SELECT taxon_id FROM taxon WHERE ncbi_taxon_id = %u"
+ taxa = self.adaptor.execute_and_fetchall(sql, (ncbi_taxon_id,))
+ if taxa:
+ return taxa[0][0]
+
+ # OK, so we're gonna try to insert the taxon
+
+ # Common name
+ try:
+ common_name = record.annotations["source"]
+ except KeyError:
+ common_name = None
+
+ # Full lineage
+ try:
+ full_lineage = record.annotations["taxonomy"]
+ ante, last = binomial.split()
+ if full_lineage[-1] == ante:
+ full_lineage.append(last)
+ full_lineage.reverse()
+ full_lineage = ':'.join(full_lineage)
+ except KeyError:
+ full_lineage = None
+
+ # Check for the NON NULLs
+ if binomial == None or variant == None or full_lineage == None:
+ return
+
+ # Insert into the taxon table
+ sql = "INSERT INTO taxon (binomial, variant, common_name," \
+ " ncbi_taxon_id, full_lineage)" \
+ " VALUES (%s, %s, %s, %d, %s)"
+ self.adaptor.execute(sql, (binomial, variant, common_name,
+ ncbi_taxon_id, full_lineage))
+ taxon_id = self.adaptor.last_id('taxon')
+
+ return taxon_id
+
def _load_bioentry_table(self, record):
"""Fill the bioentry table with sequence information.
"""
@@ -68,18 +165,21 @@ class DatabaseLoader:
if record.id.find('.') >= 0: # try to get a version from the id
accession, version = record.id.split('.')
- else: # otherwise just use a null version
+ version = int(version)
+ else: # otherwise just use a version of 0
accession = record.id
version = 0
- try:
- division = record.annotations["data_file_divison"]
- except KeyError:
- division = "No"
- sql = r"INSERT INTO bioentry (biodatabase_id, display_id, " \
- r"accession, entry_version, division) VALUES" \
- r" (%s, %s, %s, %s, %s)"
- self.adaptor.execute(sql, (self.dbid, record.name,
- accession, version, division))
+
+ taxon_id = self._get_taxon_id(record)
+ identifier = record.annotations.get('gi')
+ description = getattr(record, 'description', None)
+
+ sql = r"INSERT INTO bioentry (biodatabase_id, taxon_id, display_id, " \
+ r"accession, identifier, description, entry_version) VALUES" \
+ r" (%d, %d, %s, %s, %s, %s, %d)"
+ self.adaptor.execute(sql, (self.dbid, taxon_id, record.name,
+ accession, identifier, description,
+ version))
# now retrieve the id for the bioentry
bioentry_id = self.adaptor.last_id('bioentry')
@@ -100,46 +200,83 @@ class DatabaseLoader:
r" (%s, %s, %s)"
self.adaptor.execute(sql, (bioentry_id, date_id, date))
- def _load_bioentry_taxa(self, record, bioentry_id):
- """Add taxa information to the database.
- """
- return None # XXX don't do anything right now
- try:
- # XXX this isn't right, we need taxa ids and other junk
- taxa = record.annotations["taxa"]
- sql = r"INSERT INTO bioentry_taxa(bioentry_id, taxa_id) VALUES" \
- r" (%s, %s)"
- self.adapter.execute(sql, (bioentry_id, taxa))
- except KeyError:
- pass
-
def _load_biosequence(self, record, bioentry_id):
"""Load the biosequence table in the database.
"""
accession, version = record.id.split(".")
+ version = int(version)
# determine the string representation of the alphabet
if isinstance(record.seq.alphabet, Alphabet.DNAAlphabet):
- alphabet = "DNA"
+ alphabet = "dna"
elif isinstance(record.seq.alphabet, Alphabet.RNAAlphabet):
- alphabet = "RNA"
+ alphabet = "rna"
elif isinstance(record.seq.alphabet, Alphabet.ProteinAlphabet):
- alphabet = "PROTEIN"
+ alphabet = "protein"
else:
- alphabet = "UNKNOWN"
+ alphabet = "unknown"
- sql = r"INSERT INTO biosequence (bioentry_id, seq_version, " \
- r"biosequence_str, molecule) VALUES (%s, %s, %s, %s)"
- self.adaptor.execute(sql, (bioentry_id, version, record.seq.data,
- alphabet))
-
- def _load_bioentry_description(self, record, bioentry_id):
- """Load the description table.
- """
- descr_id = self._get_ontology_id("description", "Sequence description")
- sql = r"INSERT INTO bioentry_qualifier_value VALUES (%s, %s, %s)"
- self.adaptor.execute(sql, (bioentry_id, descr_id,
- record.description))
+ try:
+ division = record.annotations["data_file_division"]
+ except KeyError:
+ division = "UNK"
+ sql = r"INSERT INTO biosequence (bioentry_id, seq_version, " \
+ r"seq_length, biosequence_str, alphabet, division) " \
+ r"VALUES (%d, %d, %d, %s, %s, %s)"
+ self.adaptor.execute(sql, (bioentry_id, version,
+ len(record.seq.data),
+ record.seq.data,
+ alphabet, division))
+
+ def _load_comment(self, record, bioentry_id):
+ # Assume annotations['comment'] is not a list
+ comment = record.annotations.get('comment')
+ if not comment:
+ return
+ comment = comment.replace('\n', ' ')
+
+ sql = "INSERT INTO comment (bioentry_id, comment_text, comment_rank)" \
+ " VALUES (%d, %s, %d)"
+ self.adaptor.execute(sql, (bioentry_id, comment, 1))
+
+ def _load_reference(self, reference, rank, bioentry_id):
+ # Currently, the UK is either the medline_id or a CRC64
+ if reference.medline_id:
+ uk = reference.medline_id
+ else:
+ s = ''
+ for f in reference.authors, reference.title, reference.journal:
+ if f: s += f
+ else: s += "<undef>"
+ uk = crc64(s)
+
+ sql = "SELECT reference_id FROM reference WHERE reference_medline = %s"
+ refs = self.adaptor.execute_and_fetch_col0(sql, (uk,))
+ if not len(refs):
+ authors = reference.authors or None
+ title = reference.title or None
+ journal = reference.journal or None
+ sql = "INSERT INTO reference (reference_location," \
+ " reference_title, reference_authors, reference_medline)" \
+ " VALUES (%s, %s, %s, %s)"
+ self.adaptor.execute(sql, (journal, title,
+ authors, uk))
+ reference_id = self.adaptor.last_id('reference')
+ else:
+ reference_id = refs[0]
+ if len(reference.location):
+ start = 1 + int(str(reference.location[0].start))
+ end = int(str(reference.location[0].end))
+ else:
+ start = None
+ end = None
+
+ sql = "INSERT INTO bioentry_reference (bioentry_id, reference_id," \
+ " reference_start, reference_end, reference_rank)" \
+ " VALUES (%d, %d, %d, %d, %d)"
+ self.adaptor.execute(sql, (bioentry_id, reference_id,
+ start, end, rank + 1))
+
def _load_seqfeature(self, feature, feature_rank, bioentry_id):
"""Load a biopython SeqFeature into the database.
"""
@@ -154,13 +291,20 @@ class DatabaseLoader:
This loads the "key" of the seqfeature (ie. CDS, gene) and
the basic seqfeature table itself.
"""
- seqfeature_key_id = self._get_ontology_id(feature_type)
+ category_id = self._get_ontology_id('SeqFeature Keys')
+ seqfeature_key_id = self._get_ontology_id(feature_type,
+ category_id = category_id)
+
+ # XXX source is always EMBL/GenBank/SwissProt here; it should depend on
+ # the record
+ source_cat_id = self._get_ontology_id('SeqFeature Sources')
+ source_id = self._get_ontology_id('EMBL/GenBank/SwissProt',
+ category_id = source_cat_id)
- # XXX This doesn't do source yet, since I'm not sure I understand it.
- sql = r"INSERT INTO seqfeature (bioentry_id, seqfeature_key_id, " \
- r"seqfeature_rank) VALUES (%s, %s, %s)"
+ sql = r"INSERT INTO seqfeature (bioentry_id, ontology_term_id, " \
+ r"seqfeature_source_id, seqfeature_rank) VALUES (%d, %d, %d, %d)"
self.adaptor.execute(sql, (bioentry_id, seqfeature_key_id,
- feature_rank))
+ source_id, feature_rank + 1))
seqfeature_id = self.adaptor.last_id('seqfeature')
return seqfeature_id
@@ -193,7 +337,7 @@ class DatabaseLoader:
"""
sql = r"INSERT INTO seqfeature_location (seqfeature_id, " \
r"seq_start, seq_end, seq_strand, location_rank) " \
- r"VALUES (%s, %s, %s, %s, %s)"
+ r"VALUES (%d, %d, %d, %d, %d)"
# hack for NOT NULL in strand -- we have None be the same as 0
# for strand information
@@ -208,7 +352,7 @@ class DatabaseLoader:
start = feature.location.nofuzzy_start + 1
end = feature.location.nofuzzy_end
- self.adaptor.execute(sql, (seqfeature_id, start, end, strand, rank))
+ self.adaptor.execute(sql, (seqfeature_id, start, end, strand, rank+1))
def _load_seqfeature_qualifiers(self, qualifiers, seqfeature_id):
"""Insert the (key, value) pair qualifiers relating to a feature.
@@ -216,16 +360,18 @@ class DatabaseLoader:
Qualifiers should be a dictionary of the form:
{key : [value1, value2]}
"""
+ tag_category_id = self._get_ontology_id('Annotation Tags')
for qualifier_key in qualifiers.keys():
- qualifier_key_id = self._get_ontology_id(qualifier_key)
+ qualifier_key_id = self._get_ontology_id(qualifier_key,
+ category_id = tag_category_id)
# now add all of the values to their table
for qual_value_rank in range(len(qualifiers[qualifier_key])):
qualifier_value = qualifiers[qualifier_key][qual_value_rank]
sql = r"INSERT INTO seqfeature_qualifier_value VALUES" \
- r" (%s, %s, %s, %s)"
+ r" (%d, %d, %d, %s)"
self.adaptor.execute(sql, (seqfeature_id,
- qualifier_key_id, qual_value_rank, qualifier_value))
+ qualifier_key_id, qual_value_rank + 1, qualifier_value))
class DatabaseRemover:
"""Complement the Loader functionality by fully removing a database.
More information about the Biopython-dev
mailing list