[Biopython-dev] Update Operation for BioSQL.Loader
Andreas Kuntzagk
andreas.kuntzagk at mdc-berlin.de
Wed Jun 5 11:29:51 EDT 2002
Hi,
I'v written some update-functions for the Loader class.
It will update an existing bioentry/sequence whith a record whith the same
accession.
Attention: this is not fully testet.
Maybe someone can have a look on the attached diff (generated with cvs diff)
bye, Andreas
-------------- next part --------------
Index: Loader.py
===================================================================
RCS file: /home/repository/biopython/biopython/BioSQL/Loader.py,v
retrieving revision 1.8
diff -r1.8 Loader.py
13a14,16
> class UpdateNonExistingError(Exception):
> pass
>
24c27,35
<
---
>
> def update_or_load(self, record):
> """Update an existing Record or load a new one
> """
> try:
> update_seqrecord(record)
> except UpdateNonExistingError: # Record does not exist
> load_seqrecord(record)
>
66c77
< """
---
> """
104a116,128
> def _load_bioentry_keyword(self, record):
> """Add keywords into the database"""
> try:
> id = self.adaptor.fetch_seqid_by_display_id(self.dbid, record.name)
> keywords = record.annotations["keywords"]
> keyword_ont_id = self._get_ontology_id("keyword")
> sql = r"INSERT INTO bioentry_qualifier_value VALUES" \
> r" (%s, %s, %s)"
> for k in keywords:
> self.adaptor.execute_one(sql, (bioentry_id, keyword_ont_id ,k))
> except KeyError:
> pass
>
233c257,396
<
---
>
> def update_seqrecord(self, record):
> """Update an existing entry in the database
> """
> id = self._update_bioentry(record)
> self._update_bioentry_date(record, id)
> self._update_bioentry_keyword(record, id)
> self._update_bioentry_description(record,id)
> for seq_feature_num in range(len(record.features)):
> seq_feature = record.features[seq_feature_num]
> self._update_seqfeature(seq_feature, seq_feature_num, id)
>
> def _update_bioentry(self, record):
> """Lookup and update an existing entry in the Database.
> """
> if record.id.find('.') >= 0: # try to get a version from the id
> accession, version = record.id.split('.')
> else: # otherwise just use a null version
> accession = record.id
> version = 0
> try:
> division = record.annotations["data_file_divison"]
> except KeyError:
> division = "No"
> sql = r"SELECT bioentry_id from bioentry where accession = (%s)"
>
> try:
> bioentry_id = self.adaptor.execute_one(sql, (accession))[0]
> except AssertionError:
> raise UpdateNonExistingError
> sql = r"UPDATE bioentry SET division = %s, entry_version = %s, biodatabase_id = %s WHERE " \
> r"bioentry_id = %s"
>
> self.adaptor.execute(sql, (division, version, self.dbid, bioentry_id))
> return bioentry_id
>
> def _update_bioentry_qualifier(self, bioentry_id, ontology_term_id, value):
> sql = r"UPDATE bioentry_qualifier_value SET qualifier_value = %s WHERE bioentry_id = %s AND ontology_term_id = %s"
> self.adaptor.execute(sql, (value, bioentry_id, ontology_term_id))
>
> def _update_bioentry_date(self, record, bioentry_id):
> """Update the effective date of the entry into the database.
> """
> # dates are GenBank style, like:
> # 14-SEP-2000
> try:
> date = record.annotations["date"]
> except KeyError:
> # just use today's date
> date = strftime("%d-%b-%Y", gmtime())
> date_id = self._get_ontology_id("date", "Sequence date")
> self._update_bioentry_qualifier(bioentry_id, date_id, date)
>
> def _update_bioentry_keyword(self, record, bioentry_id):
> """Update keywords into the database"""
> try:
> keywords = record.annotations["keywords"]
> keyword_ont_id = self._get_ontology_id("keyword")
> for k in keywords:
> self._update_bioentry_qualifier(bioentry_id, keyword_ont_id, k)
> except KeyError:
> pass
>
> def _update_biosequence(self, record, bioentry_id):
> """Update the biosequence table in the database.
> """
> accession, version = record.id.split(".")
> # determine the string representation of the alphabet
> if isinstance(record.seq.alphabet, Alphabet.DNAAlphabet):
> mol_type = "DNA"
> elif isinstance(record.seq.alphabet, Alphabet.RNAAlphabet):
> mol_type = "RNA"
> elif isinstance(record.seq.alphabet, Alphabet.ProteinAlphabet):
> mol_type = "PROTEIN"
> else:
> mol_type = "UNKNOWN"
>
> sql = r"UPDATE biosequence SET seq_version = %s, molecule = %s, biosequence_str = %s WHERE bioentry_id = %s"
> self.adaptor.execute_one(sql, (version, mol_type, record.seq.data, bioentry_id))
>
> def _update_bioentry_description(self, record, bioentry_id):
> """Update the description table.
> """
> descr_id = self._get_ontology_id("description", "Sequence description")
> self._update_bioentry_qualifier(bioentry_id, descr_id, record.description)
>
> def _update_seqfeature(self, feature, feature_rank, bioentry_id):
> """Update a biopython SeqFeature into the database.
> """
> seqfeature_id = self._update_seqfeature_basic(feature.type, feature_rank,
> bioentry_id)
> self._update_seqfeature_location(feature, seqfeature_id)
> self._update_seqfeature_qualifiers(feature.qualifiers, seqfeature_id)
>
> def _update_seqfeature_basic(self, feature_type, feature_rank, bioentry_id):
> """Start update of seqfeature.
> """
> seqfeature_key_id = self._get_ontology_id(feature_type)
>
> sql = r"SELECT seqfeature_id FROM seqfeature WHERE bioentry_id = %s AND seqfeature_key_id = %s"
> seqfeature_id = self.adaptor.execute_one (sql, (bioentry_id, seqfeature_key_id))[0]
>
> # Not updating the feature_rank
>
> return seqfeature_id
>
> def _update_seqfeature_location(self, feature, seqfeature_id):
> """Update a location of a SeqFeature to the seqfeature_location table.
> """
>
> # hack for NOT NULL in strand -- we have None be the same as 0
> # for strand information
> if feature.strand is None:
> strand = 0
> else:
> strand = feature.strand
>
> # convert biopython locations to the 1-based location system
> # used in bioSQL
> # XXX This could also handle fuzzies
> start = feature.location.nofuzzy_start + 1
> end = feature.location.nofuzzy_end
>
> sql = r"UPDATE seqfeature_location SET seq_start =%s, seq_end = %s, seq_strand = %s " \
> r"WHERE seqfeature_id = %s"
>
> self.adaptor.execute(sql, (start, end, strand, seqfeature_id))
>
> def _update_seqfeature_qualifiers(self, qualifiers, seqfeature_id):
> """Updating the (key, value) pair qualifiers relating to a feature.
>
> Qualifiers should be a dictionary of the form:
> {key : [value1, value2]}
> """
> #Deleting all rows with this seqfeature_id
> sql = r"DELETE FROM seqfeature_qualifier_value WHERE seqfeature_id = %s"
> self.adaptor.execute(sql, (seqfeature_id))
> #reload qualifiers
> self._load_seqfeature_qualifiers(qualifiers, seqfeature_id)
>
More information about the Biopython-dev
mailing list