[BioRuby-cvs] bioruby/lib/bio/db/embl format_embl.rb, 1.1.2.2, 1.1.2.3 common.rb, 1.12.2.3, 1.12.2.4
Naohisa Goto
ngoto at dev.open-bio.org
Wed Apr 23 18:52:20 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv13059/lib/bio/db/embl
Modified Files:
Tag: BRANCH-biohackathon2008
format_embl.rb common.rb
Log Message:
* lib/bio/reference.rb
* New methods: Bio::Reference#comments, Bio::Reference#doi
* Code of Bio::Reference#embl is moved to lib/bio/db/embl/format_embl.rb
to improve tolerance for various data (e.g. references with no
record numbers or with duplicated record numbers).
* lib/bio/db/embl/common.rb
* Changes to support for Bio::Reference#comments.
* lib/bio/db/embl/format_embl.rb
* Bio::Sequence::Format::NucFormatter::Embl#reference_format_embl
(private method) is added based on Bio::Reference#embl.
* Changes to improve tolerance for various data.
Index: format_embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/Attic/format_embl.rb,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -C2 -d -r1.1.2.2 -r1.1.2.3
*** format_embl.rb 27 Mar 2008 13:38:31 -0000 1.1.2.2
--- format_embl.rb 23 Apr 2008 18:52:18 -0000 1.1.2.3
***************
*** 25,28 ****
--- 25,76 ----
end
+ # format reference
+ # ref:: Bio::Reference object
+ # hash:: (optional) a hash for RN (reference number) administration
+ def reference_format_embl(ref, hash = nil)
+ lines = Array.new
+ if ref.embl_gb_record_number or hash then
+ refno = ref.embl_gb_record_number.to_i
+ hash ||= {}
+ if refno <= 0 or hash[refno] then
+ refno = hash.keys.sort[-1].to_i + 1
+ hash[refno] = true
+ end
+ lines << embl_wrap("RN ", "[#{refno}]")
+ end
+ if ref.comments then
+ ref.comments.each do |cmnt|
+ lines << embl_wrap("RC ", cmnt)
+ end
+ end
+ unless ref.sequence_position.to_s.empty? then
+ lines << embl_wrap("RP ", "#{ref.sequence_position}")
+ end
+ unless ref.doi.to_s.empty? then
+ lines << embl_wrap("RX ", "DOI; #{ref.doi}.")
+ end
+ unless ref.pubmed.to_s.empty? then
+ lines << embl_wrap("RX ", "PUBMED; #{ref.pubmed}.")
+ end
+ unless ref.authors.empty?
+ lines << embl_wrap('RA ', ref.authors.join(', ') + ';')
+ end
+ lines << embl_wrap('RT ',
+ (ref.title.to_s.empty? ? '' :
+ "\"#{ref.title}\"") + ';')
+ unless ref.journal.to_s.empty? then
+ volissue = "#{ref.volume.to_s}"
+ volissue = "#{volissue}(#{ref.issue})" unless ref.issue.to_s.empty?
+ rl = "#{ref.journal}"
+ rl += " #{volissue}" unless volissue.empty?
+ rl += ":#{ref.pages}" unless ref.pages.to_s.empty?
+ rl += "(#{ref.year})" unless ref.year.to_s.empty?
+ rl += '.'
+ lines << embl_wrap('RL ', rl)
+ end
+ lines << "XX"
+ return lines.join("\n")
+ end
+
def seq_format_embl(seq)
output_lines = Array.new
***************
*** 43,64 ****
erb_template <<'__END_OF_TEMPLATE__'
ID <%= entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= molecule_type %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
! XX
<%= embl_wrap('AC ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
! XX
DT <%= date_created %>
DT <%= date_modified %>
! XX
<%= embl_wrap('DE ', definition) %>
! XX
<%= embl_wrap('KW ', keywords.join('; ') + '.') %>
! XX
OS <%= species %>
<%= embl_wrap('OC ', classification.join('; ') + '.') %>
XX
! <%= (references || []).collect{|ref| ref.format('embl')}.join("\n") %>
! XX
! FH Key Location/Qualifiers
! FH
! <%= format_features_embl(features || []) %>XX
SQ Sequence <%= seq.length %> BP; <%= seq.composition.collect{|k,v| "#{v} #{k.upcase}"}.join('; ') + '; ' + (seq.gsub(/[ACTGactg]/, '').length.to_s ) + ' other;' %>
<%= seq_format_embl(seq) %>
--- 91,111 ----
erb_template <<'__END_OF_TEMPLATE__'
ID <%= entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= molecule_type %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
! XX
<%= embl_wrap('AC ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
! XX
DT <%= date_created %>
DT <%= date_modified %>
! XX
<%= embl_wrap('DE ', definition) %>
! XX
<%= embl_wrap('KW ', keywords.join('; ') + '.') %>
! XX
OS <%= species %>
<%= embl_wrap('OC ', classification.join('; ') + '.') %>
XX
! <% hash = {}; (references || []).each do |ref| %><%= reference_format_embl(ref, hash) %>
! <% end %>FH Key Location/Qualifiers
! FH
! <%= format_features_embl(features || []) %>XX
SQ Sequence <%= seq.length %> BP; <%= seq.composition.collect{|k,v| "#{v} #{k.upcase}"}.join('; ') + '; ' + (seq.gsub(/[ACTGactg]/, '').length.to_s ) + ' other;' %>
<%= seq_format_embl(seq) %>
Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/common.rb,v
retrieving revision 1.12.2.3
retrieving revision 1.12.2.4
diff -C2 -d -r1.12.2.3 -r1.12.2.4
*** common.rb 23 Apr 2008 18:04:51 -0000 1.12.2.3
--- common.rb 23 Apr 2008 18:52:18 -0000 1.12.2.4
***************
*** 280,284 ****
end
when 'RC'
! hash['comment'] = value
when 'RP'
hash['sequence_position'] = value
--- 280,287 ----
end
when 'RC'
! unless value.to_s.strip.empty?
! hash['comments'] ||= []
! hash['comments'].push value
! end
when 'RP'
hash['sequence_position'] = value
More information about the bioruby-cvs
mailing list