[BioRuby] concatenate two EMBL sequence files
GOTO Naohisa
ngoto at gen-info.osaka-u.ac.jp
Wed Jan 18 06:11:10 EST 2006
On Wed, 18 Jan 2006 08:30:02 -0000
"jan aerts \(RI\)" <jan.aerts at bbsrc.ac.uk> wrote:
> Just for future reference (including myself): the code to shift features in a GenBank sequence. It might be improved on (a lot?), but it works for me...
In your Locations#to_s,
complement(join(10..20,30..40))
will be changed to
join(complement(30..40),complement(10..20))
I think this might be something wrong.
Because Bio::Locations doesn't record some information,
we cannot reproduce a complete location string from
a Bio::Locations object.
I recently wrote routines to convert GenBank <=> EMBL
features. I attached a file in this mail.
I hope it will help you.
It was not incoorporated into BioRuby because
I had no idea about entry output framework
(counterpart of Bio::FlatFile ???).
In my Locations#to_s routine,
"complement(join(10..20,30..40))" will be correct,
but "complement(join(10000..12000,1..100))"
for a circular sequence will still be changed to
"join(complement(1..100),complement(10000..12000))".
This is limitation of Bio::Locations that losts
information.
--
Naohisa GOTO
ngoto at gen-info.osaka-u.ac.jp
Department of Genome Informatics, Genome Information Research Center,
Research Institute for Microbial Diseases, Osaka University, Japan
-------------- next part --------------
#
require 'bio'
module Bio
class Feature
def formatted_string(style = 'GenBank')
style = style.to_s
case style
when /GenBank/i, /NCBI/i, /DDBJ/i
style = :genbank
when /EMBL/i
style = :embl
else
raise ArgumentError, "unknown style"
end
case style
when :genbank
head = ' ' * 5
head2 = head + ' ' * 16
width = 79 - head2.length
first = head + sprintf("%-16s", self.feature)
when :embl
head = 'FT '
head2 = head + ' ' * 16
width = 80 - head2.length
first = head + sprintf("%-16s", self.feature)
end
res = first
h = ''
#position_string = self.position
position_string = self.locations.to_s
str_wrap(position_string, width).each_line do |x|
res << h << x
h = head2
end
self.qualifiers.each do |q|
if q.value.is_a?(Integer) or
q.qualifier == 'transl_table' or
q.value == true then
val = q.value.to_s
else
# http://www.ncbi.nlm.nih.gov/collab/FT/index.html
# 3.3.3.1
val = q.value.gsub(/"/, '""')
#val.delete!("\x00-\x1f\x7f-\xff")
val = '"' + val + '"'
end
if q.value == true then
w = str_wrap('/' + q.qualifier, width)
elsif q.qualifier == 'translation' then
w = str_wrap_force('/' + q.qualifier + '=' + val, width)
else
w = str_wrap('/' + q.qualifier + '=' + val, width)
end
w.each_line do |x|
res << head2 << x
end
end
res
end
def str_wrap_force(str, width)
str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n")
end
private :str_wrap_force
def str_wrap(str, width)
res = ''
while str and str.length > width do
r = nil
width.downto(1) do |i|
if str[i..i] == ' ' or /[,;]/ =~ str[(i-1)..(i-1)] then
r = str[0..(i-1)].sub(/ +\z/, '')
str = str[i..-1].sub(/\A +/, '')
break
end
end
if r == nil then
r = str[0..(width-1)]
str = str[width..-1]
end
res << r << "\n"
end
res << str << "\n" if str
res
end
private :str_wrap
end #class Feature
class Features
def formatted_string(*arg)
self.features.collect do |x|
x.formatted_string(*arg)
end.join('')
end
end #class Features
class Locations
def to_s
case self.locations.size
when 0
return ''
when 1
return self.locations[0].to_s
end
r = []
clocs = []
self.locations.each do |loc|
raise "Locations#to_s does not support @sequence" if loc.sequence
raise "Locations#to_s does not support @xref_id" if loc.xref_id
if loc.strand < 0
unless clocs.size > 0 and clocs[-1].to > loc.from then
r << format_complement_join_and_clear(clocs) if clocs.size > 0
end
clocs << loc
else
r << format_complement_join_and_clear(clocs) if clocs.size > 0
r << loc.to_s
end
end
r << format_complement_join_and_clear(clocs) if clocs.size > 0
if r.size <= 0 then
''
elsif r.size == 1 then
r[0]
else
"join(#{r.join(',')})"
end
end
def format_complement_join_and_clear(ary)
r = format_complement_join(ary)
ary.clear
r
end
private :format_complement_join_and_clear
def format_complement_join(ary)
return '' if ary.size <= 0
return ary[0].to_s if ary.size == 1
a = ary.collect do |loc|
"#{loc.lt ? '<' : ''}#{loc.from}..#{loc.gt ? '>' : ''}#{loc.to}"
end
a.reverse!
"complement(join(#{a.join(',')}))"
end
private :format_complement_join
end #class Locations
class Location
def to_s
raise "Location#to_s does not support @sequence" if @sequence
raise "Location#to_s does not support @xref_id" if @xref_id
r = "#{@lt ? '<' : ''}#{@from}..#{@gt ? '>' : ''}#{@to}"
@strand < 0 ? "complement(#{r})" : r
end
end #class Location
end #module Bio
More information about the BioRuby
mailing list