[Biopython] zero-length feature
Anne Pajon
ap12 at sanger.ac.uk
Fri Mar 19 19:19:05 UTC 2010
Dear,
I am having trouble writing out EMBL file for feature of size one.
I've modified InsdcIO.py to fit my need. Because when I try to submit
my file to EMBL, it comes back with this comment: badly formatted --
you need a .. between locations.
def _insdc_location_string_ignoring_strand_and_subfeatures(feature):
if feature.ref:
ref = "%s:" % feature.ref
else:
ref = ""
assert not feature.ref_db
if feature.location.start == feature.location.end \
and isinstance(feature.location.end, SeqFeature.ExactPosition):
#Special case, 12^13 gets mapped to location 12:12
#(a zero length slice, meaning the point between two letters)
return "%s%i..%i" % (ref, feature.location.end.position+1,
feature.location.end.position+1)
else:
#Typical case, e.g. 12..15 gets mapped to 11:15
return ref \
+
_insdc_feature_position_string(feature.location.start, +1) \
+ ".." + \
_insdc_feature_position_string(feature.location.end)
But of course I am getting errors when running the tests:
======================================================================
FAIL: GenBank file to BioSQL and back to a GenBank file, NC_005816.
----------------------------------------------------------------------
Traceback (most recent call last):
File "test_BioSQL.py", line 419, in test_NC_005816
self.loop(os.path.join(os.getcwd(), "GenBank", "NC_005816.gb"),
"gb")
File "test_BioSQL.py", line 481, in loop
self.assert_(compare_record(old, new))
File "seq_tests_common.py", line 261, in compare_record
if not compare_features(old.features, new.features):
File "seq_tests_common.py", line 243, in compare_features
if not compare_feature(old_f, new_f):
File "seq_tests_common.py", line 98, in compare_feature
raise e
AssertionError: [5933:5933] -> [5933:5934]
======================================================================
ERROR: Write and read back AE017046.embl
----------------------------------------------------------------------
Traceback (most recent call last):
File "test_SeqIO_features.py", line 777, in test_AE017046
write_read(os.path.join("EMBL", "AE017046.embl"), "embl", "gb")
File "test_SeqIO_features.py", line 32, in write_read
compare_records(gb_records, gb_records2)
File "test_SeqIO_features.py", line 99, in compare_records
if not compare_record(old,new,expect_minor_diffs):
File "test_SeqIO_features.py", line 50, in compare_record
if not compare_features(old.features, new.features):
File "test_SeqIO_features.py", line 149, in compare_features
if not compare_feature(old,new,ignore_sub_features):
File "test_SeqIO_features.py", line 110, in compare_feature
% (old.location, new.location, str(old), str(new)))
ValueError: [5933:5933] versus [5933:5934]:
type: variation
location: [5933:5933]
ref: None:None
strand: 1
qualifiers:
Key: note, Value: ['compared to AL109969']
Key: replace, Value: ['a']
vs:
type: variation
location: [5933:5934]
ref: None:None
strand: 1
qualifiers:
Key: note, Value: ['compared to AL109969']
Key: replace, Value: ['a']
======================================================================
ERROR: Write and read back NC_005816.gb
----------------------------------------------------------------------
Traceback (most recent call last):
File "test_SeqIO_features.py", line 702, in test_NC_005816
write_read(os.path.join("GenBank", "NC_005816.gb"), "gb", "gb")
File "test_SeqIO_features.py", line 32, in write_read
compare_records(gb_records, gb_records2)
File "test_SeqIO_features.py", line 99, in compare_records
if not compare_record(old,new,expect_minor_diffs):
File "test_SeqIO_features.py", line 50, in compare_record
if not compare_features(old.features, new.features):
File "test_SeqIO_features.py", line 149, in compare_features
if not compare_feature(old,new,ignore_sub_features):
File "test_SeqIO_features.py", line 110, in compare_feature
% (old.location, new.location, str(old), str(new)))
ValueError: [5933:5933] versus [5933:5934]:
type: variation
location: [5933:5933]
ref: None:None
strand: 1
qualifiers:
Key: note, Value: ['compared to AL109969']
Key: replace, Value: ['a']
vs:
type: variation
location: [5933:5934]
ref: None:None
strand: 1
qualifiers:
Key: note, Value: ['compared to AL109969']
Key: replace, Value: ['a']
======================================================================
ERROR: Write and read back SC10H5.embl
----------------------------------------------------------------------
Traceback (most recent call last):
File "test_SeqIO_features.py", line 792, in test_SC10H5
write_read(os.path.join("EMBL", "SC10H5.embl"), "embl", "gb")
File "test_SeqIO_features.py", line 32, in write_read
compare_records(gb_records, gb_records2)
File "test_SeqIO_features.py", line 99, in compare_records
if not compare_record(old,new,expect_minor_diffs):
File "test_SeqIO_features.py", line 50, in compare_record
if not compare_features(old.features, new.features):
File "test_SeqIO_features.py", line 149, in compare_features
if not compare_feature(old,new,ignore_sub_features):
File "test_SeqIO_features.py", line 110, in compare_feature
% (old.location, new.location, str(old), str(new)))
ValueError: [1800:1800] versus [1800:1801]:
type: misc_feature
location: [1800:1800]
ref: None:None
strand: 1
qualifiers:
Key: note, Value: ['Zero-length feature added to test Bioperl
parsing']
vs:
type: misc_feature
location: [1800:1801]
ref: None:None
strand: 1
qualifiers:
Key: note, Value: ['Zero-length feature added to test Bioperl
parsing']
======================================================================
FAIL: Features: write/read simple between locations.
----------------------------------------------------------------------
Traceback (most recent call last):
File "test_SeqIO_features.py", line 373, in test_between
"10^11")
AssertionError: '11..11' != '10^11'
----------------------------------------------------------------------
Ran 144 tests in 226.037 seconds
FAILED (failures = 2)
What could be a better solution? Thanks to let me know.
Kind regards,
Anne.
--
Dr Anne Pajon - Pathogen Genomics, Team 81
Sanger Institute, Wellcome Trust Genome Campus, Hinxton
Cambridge CB10 1SA, United Kingdom
+44 (0)1223 494 798 (office) | +44 (0)7958 511 353 (mobile)
--
The Wellcome Trust Sanger Institute is operated by Genome Research
Limited, a charity registered in England with number 1021457 and a
company registered in England with number 2742969, whose registered
office is 215 Euston Road, London, NW1 2BE.
More information about the Biopython
mailing list