[Biopython-dev] Update Operation for BioSQL.Loader

Andreas Kuntzagk andreas.kuntzagk at mdc-berlin.de
Wed Jun 5 11:29:51 EDT 2002


Hi,

I'v written some update-functions for the Loader class.
It will update an existing bioentry/sequence whith a record whith the same
accession.

Attention: this is not fully testet.

Maybe someone can have a look on the attached diff (generated with cvs diff)

bye, Andreas
-------------- next part --------------
Index: Loader.py
===================================================================
RCS file: /home/repository/biopython/biopython/BioSQL/Loader.py,v
retrieving revision 1.8
diff -r1.8 Loader.py
13a14,16
> class UpdateNonExistingError(Exception):
>     pass
>     
24c27,35
<     
---
> 
>     def update_or_load(self, record):
>         """Update an existing Record or load a new one    
> 	"""
> 	try: 
> 	    update_seqrecord(record)
> 	except UpdateNonExistingError: # Record does not exist
> 	    load_seqrecord(record)
> 	
66c77
<         """
---
> 	"""
104a116,128
>     def _load_bioentry_keyword(self, record):
>         """Add keywords into the database"""
> 	try:
>             id = self.adaptor.fetch_seqid_by_display_id(self.dbid, record.name)
> 	    keywords = record.annotations["keywords"]
> 	    keyword_ont_id =  self._get_ontology_id("keyword")
> 	    sql = r"INSERT INTO bioentry_qualifier_value VALUES" \
>                   r" (%s, %s, %s)" 
> 	    for k in keywords:
>                 self.adaptor.execute_one(sql, (bioentry_id, keyword_ont_id ,k))
>         except KeyError:
> 	    pass
> 	    
233c257,396
<        
---
> 		  
>     def update_seqrecord(self, record):
>         """Update an existing entry in the database
> 	"""
> 	id = self._update_bioentry(record)
> 	self._update_bioentry_date(record, id)
> 	self._update_bioentry_keyword(record, id)
>         self._update_bioentry_description(record,id)
>         for seq_feature_num in range(len(record.features)):
>             seq_feature = record.features[seq_feature_num]
>             self._update_seqfeature(seq_feature, seq_feature_num, id)
> 	
>     def _update_bioentry(self, record):
>         """Lookup and update an existing entry in the Database.
> 	"""
> 	if record.id.find('.') >= 0: # try to get a version from the id
>             accession, version = record.id.split('.')
>         else: # otherwise just use a null version
>             accession = record.id
>             version = 0
>         try:
>             division = record.annotations["data_file_divison"]
>         except KeyError:
>             division = "No"
>         sql = r"SELECT bioentry_id from bioentry where accession = (%s)"
> 
> 	try:
> 	    bioentry_id = self.adaptor.execute_one(sql, (accession))[0]
> 	except AssertionError:
> 	    raise UpdateNonExistingError
> 	sql = r"UPDATE bioentry SET division = %s, entry_version = %s, biodatabase_id = %s WHERE " \
> 	      r"bioentry_id = %s"
> 	
>         self.adaptor.execute(sql, (division, version, self.dbid, bioentry_id))
> 	return bioentry_id
> 	
>     def _update_bioentry_qualifier(self, bioentry_id, ontology_term_id, value):  
>         sql = r"UPDATE bioentry_qualifier_value SET qualifier_value = %s WHERE  bioentry_id = %s AND ontology_term_id = %s" 
> 	self.adaptor.execute(sql, (value, bioentry_id, ontology_term_id))
>         
>     def _update_bioentry_date(self, record, bioentry_id):
>         """Update the effective date of the entry into the database.
>         """
>         # dates are GenBank style, like:
>         # 14-SEP-2000
>         try:
>             date = record.annotations["date"]
>         except KeyError:
>             # just use today's date
>             date = strftime("%d-%b-%Y", gmtime())
>         date_id = self._get_ontology_id("date", "Sequence date")
> 	self._update_bioentry_qualifier(bioentry_id, date_id, date)
> 	
>     def _update_bioentry_keyword(self, record, bioentry_id):
>         """Update keywords into the database"""
> 	try:
> 	    keywords = record.annotations["keywords"]
> 	    keyword_ont_id =  self._get_ontology_id("keyword")
> 	    for k in keywords:
>                 self._update_bioentry_qualifier(bioentry_id, keyword_ont_id, k)
>         except KeyError:
> 	    pass
> 
>     def _update_biosequence(self, record, bioentry_id):
>         """Update the biosequence table in the database.
>         """
>         accession, version = record.id.split(".")
>         # determine the string representation of the alphabet
>         if isinstance(record.seq.alphabet, Alphabet.DNAAlphabet):
>             mol_type = "DNA"
>         elif isinstance(record.seq.alphabet, Alphabet.RNAAlphabet):
>             mol_type = "RNA"
>         elif isinstance(record.seq.alphabet, Alphabet.ProteinAlphabet):
>             mol_type = "PROTEIN"
>         else:
>             mol_type = "UNKNOWN"
>         
>         sql = r"UPDATE biosequence SET seq_version = %s, molecule = %s, biosequence_str = %s WHERE bioentry_id = %s"
>         self.adaptor.execute_one(sql, (version, mol_type, record.seq.data, bioentry_id))
> 
>     def _update_bioentry_description(self, record, bioentry_id):
>         """Update the description table.
>         """
>         descr_id = self._get_ontology_id("description", "Sequence description")
> 	self._update_bioentry_qualifier(bioentry_id, descr_id, record.description)
> 
>     def _update_seqfeature(self, feature, feature_rank, bioentry_id):
>         """Update a biopython SeqFeature into the database.
>         """
>         seqfeature_id = self._update_seqfeature_basic(feature.type, feature_rank,
>                                                     bioentry_id)
>         self._update_seqfeature_location(feature, seqfeature_id)
>         self._update_seqfeature_qualifiers(feature.qualifiers, seqfeature_id)
> 
>     def _update_seqfeature_basic(self, feature_type, feature_rank, bioentry_id):
>         """Start update of seqfeature. 
>         """
>         seqfeature_key_id = self._get_ontology_id(feature_type)
>         
>         sql = r"SELECT seqfeature_id FROM seqfeature WHERE bioentry_id = %s AND seqfeature_key_id = %s"
>         seqfeature_id = self.adaptor.execute_one (sql, (bioentry_id, seqfeature_key_id))[0]
> 
>         # Not updating the feature_rank       
>         
>         return seqfeature_id
> 
>     def _update_seqfeature_location(self, feature, seqfeature_id):
>         """Update a location of a SeqFeature to the seqfeature_location table.
>         """
> 
>         # hack for NOT NULL in strand -- we have None be the same as 0
>         # for strand information
>         if feature.strand is None:
>             strand = 0
>         else:
>             strand = feature.strand
> 
>         # convert biopython locations to the 1-based location system
>         # used in bioSQL
>         # XXX This could also handle fuzzies
>         start = feature.location.nofuzzy_start + 1
>         end = feature.location.nofuzzy_end 
> 
> 	sql = r"UPDATE seqfeature_location SET seq_start =%s, seq_end = %s, seq_strand = %s " \
>               r"WHERE seqfeature_id = %s"
>             
>         self.adaptor.execute(sql, (start, end, strand, seqfeature_id))
> 
>     def _update_seqfeature_qualifiers(self, qualifiers, seqfeature_id):
>         """Updating the (key, value) pair qualifiers relating to a feature.
> 
>         Qualifiers should be a dictionary of the form:
>             {key : [value1, value2]}
>         """
> 	#Deleting all rows with this seqfeature_id
> 	sql = r"DELETE FROM seqfeature_qualifier_value WHERE seqfeature_id = %s"
> 	self.adaptor.execute(sql, (seqfeature_id))
> 	#reload qualifiers
> 	self._load_seqfeature_qualifiers(qualifiers, seqfeature_id)	
>         	


More information about the Biopython-dev mailing list