[Biopython] zero-length feature

Anne Pajon ap12 at sanger.ac.uk
Fri Mar 19 19:19:05 UTC 2010


Dear,

I am having trouble writing out EMBL file for feature of size one.
I've modified InsdcIO.py to fit my need. Because when I try to submit  
my file to EMBL, it comes back with this comment: badly formatted --  
you need a .. between locations.

def _insdc_location_string_ignoring_strand_and_subfeatures(feature):
    if feature.ref:
        ref = "%s:" % feature.ref
    else:
        ref = ""
    assert not feature.ref_db
    if feature.location.start == feature.location.end \
    and isinstance(feature.location.end, SeqFeature.ExactPosition):
        #Special case, 12^13 gets mapped to location 12:12
        #(a zero length slice, meaning the point between two letters)
        return "%s%i..%i" % (ref, feature.location.end.position+1,
                            feature.location.end.position+1)
    else:
        #Typical case, e.g. 12..15 gets mapped to 11:15
        return ref \
               +  
_insdc_feature_position_string(feature.location.start, +1) \
               + ".." + \
               _insdc_feature_position_string(feature.location.end)

But of course I am getting errors when running the tests:

======================================================================
FAIL: GenBank file to BioSQL and back to a GenBank file, NC_005816.
----------------------------------------------------------------------
Traceback (most recent call last):
  File "test_BioSQL.py", line 419, in test_NC_005816
    self.loop(os.path.join(os.getcwd(), "GenBank", "NC_005816.gb"),  
"gb")
  File "test_BioSQL.py", line 481, in loop
    self.assert_(compare_record(old, new))
  File "seq_tests_common.py", line 261, in compare_record
    if not compare_features(old.features, new.features):
  File "seq_tests_common.py", line 243, in compare_features
    if not compare_feature(old_f, new_f):
  File "seq_tests_common.py", line 98, in compare_feature
    raise e
AssertionError: [5933:5933] -> [5933:5934]

======================================================================
ERROR: Write and read back AE017046.embl
----------------------------------------------------------------------
Traceback (most recent call last):
  File "test_SeqIO_features.py", line 777, in test_AE017046
    write_read(os.path.join("EMBL", "AE017046.embl"), "embl", "gb")
  File "test_SeqIO_features.py", line 32, in write_read
    compare_records(gb_records, gb_records2)
  File "test_SeqIO_features.py", line 99, in compare_records
    if not compare_record(old,new,expect_minor_diffs):
  File "test_SeqIO_features.py", line 50, in compare_record
    if not compare_features(old.features, new.features):
  File "test_SeqIO_features.py", line 149, in compare_features
    if not compare_feature(old,new,ignore_sub_features):
  File "test_SeqIO_features.py", line 110, in compare_feature
    % (old.location, new.location, str(old), str(new)))
ValueError: [5933:5933] versus [5933:5934]:
type: variation
location: [5933:5933]
ref: None:None
strand: 1
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['a']

vs:
type: variation
location: [5933:5934]
ref: None:None
strand: 1
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['a']


======================================================================
ERROR: Write and read back NC_005816.gb
----------------------------------------------------------------------
Traceback (most recent call last):
  File "test_SeqIO_features.py", line 702, in test_NC_005816
    write_read(os.path.join("GenBank", "NC_005816.gb"), "gb", "gb")
  File "test_SeqIO_features.py", line 32, in write_read
    compare_records(gb_records, gb_records2)
  File "test_SeqIO_features.py", line 99, in compare_records
    if not compare_record(old,new,expect_minor_diffs):
  File "test_SeqIO_features.py", line 50, in compare_record
    if not compare_features(old.features, new.features):
  File "test_SeqIO_features.py", line 149, in compare_features
    if not compare_feature(old,new,ignore_sub_features):
  File "test_SeqIO_features.py", line 110, in compare_feature
    % (old.location, new.location, str(old), str(new)))
ValueError: [5933:5933] versus [5933:5934]:
type: variation
location: [5933:5933]
ref: None:None
strand: 1
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['a']

vs:
type: variation
location: [5933:5934]
ref: None:None
strand: 1
qualifiers:
    Key: note, Value: ['compared to AL109969']
    Key: replace, Value: ['a']


======================================================================
ERROR: Write and read back SC10H5.embl
----------------------------------------------------------------------
Traceback (most recent call last):
  File "test_SeqIO_features.py", line 792, in test_SC10H5
    write_read(os.path.join("EMBL", "SC10H5.embl"), "embl", "gb")
  File "test_SeqIO_features.py", line 32, in write_read
    compare_records(gb_records, gb_records2)
  File "test_SeqIO_features.py", line 99, in compare_records
    if not compare_record(old,new,expect_minor_diffs):
  File "test_SeqIO_features.py", line 50, in compare_record
    if not compare_features(old.features, new.features):
  File "test_SeqIO_features.py", line 149, in compare_features
    if not compare_feature(old,new,ignore_sub_features):
  File "test_SeqIO_features.py", line 110, in compare_feature
    % (old.location, new.location, str(old), str(new)))
ValueError: [1800:1800] versus [1800:1801]:
type: misc_feature
location: [1800:1800]
ref: None:None
strand: 1
qualifiers:
    Key: note, Value: ['Zero-length feature added to test Bioperl  
parsing']

vs:
type: misc_feature
location: [1800:1801]
ref: None:None
strand: 1
qualifiers:
    Key: note, Value: ['Zero-length feature added to test Bioperl  
parsing']


======================================================================
FAIL: Features: write/read simple between locations.
----------------------------------------------------------------------
Traceback (most recent call last):
  File "test_SeqIO_features.py", line 373, in test_between
    "10^11")
AssertionError: '11..11' != '10^11'

----------------------------------------------------------------------
Ran 144 tests in 226.037 seconds

FAILED (failures = 2)

What could be a better solution? Thanks to let me know.
Kind regards,
Anne.
--
Dr Anne Pajon - Pathogen Genomics, Team 81
Sanger Institute, Wellcome Trust Genome Campus, Hinxton
Cambridge CB10 1SA, United Kingdom
+44 (0)1223 494 798 (office) | +44 (0)7958 511 353 (mobile)



-- 
 The Wellcome Trust Sanger Institute is operated by Genome Research 
 Limited, a charity registered in England with number 1021457 and a 
 company registered in England with number 2742969, whose registered 
 office is 215 Euston Road, London, NW1 2BE. 



More information about the Biopython mailing list