[BioRuby-cvs] bioruby/lib/bio/db/embl common.rb,1.12.2.1,1.12.2.2
Naohisa Goto
ngoto at dev.open-bio.org
Wed Apr 23 17:34:17 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv12720/lib/bio/db/embl
Modified Files:
Tag: BRANCH-biohackathon2008
common.rb
Log Message:
lib/bio/db/embl/common.rb in branch BRANCH-biohackathon2008 is copied from
CVS HEAD revision 1.13 because of the bug fixed in revision 1.13.
(Bug fix: Bio::EMBL#references failed to parse journal name, volume, issue,
pages, and year. In addition, it might fail to parse PubMed ID.)
Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/common.rb,v
retrieving revision 1.12.2.1
retrieving revision 1.12.2.2
diff -C2 -d -r1.12.2.1 -r1.12.2.2
*** common.rb 20 Feb 2008 09:56:22 -0000 1.12.2.1
--- common.rb 23 Apr 2008 17:34:15 -0000 1.12.2.2
***************
*** 241,305 ****
def ref
unless @data['R']
! @data['R'] = Array.new
! # Get the different references as 'blurbs' (the lines together)
! reference_blurbs = get('R').split(/\nRN /)
! reference_blurbs.each_index do |i|
! reference_blurbs[i] = 'RN ' + reference_blurbs[i] unless reference_blurbs[i] =~ /^RN /
! end
!
! # For each reference, we'll first create a hash that looks like below.
! # Suppose the input is:
! # RA name1, name2, name3
! # RA name4
! # RT some part of the title that
! # RT did not fit on one line
! # Then the hash looks like:
! # h = {
! # 'RA' => ["name1, name2, name3", "name4"],
! # 'RT' => ["some part of the title that", "did not fit on one line"]
! # }
! reference_blurbs.each do |rb|
! line_based_data = Hash.new
! rb.split(/\n/).each do |line|
! key, value = line.scan(/^(R[A-Z]) "?(\[?.*[A-Za-z0-9]\]?)/)[0]
! if line_based_data[key].nil?
! line_based_data[key] = Array.new
! end
! line_based_data[key].push(value)
! end
!
! # Now we have to sanitize the hash: the authors should be kept in an
! # array, the title should be 1 string, ... So the hash should look like:
! # h = {
! # 'RA' => ["name1", "name2", "name3", "name4"],
! # 'RT' => 'some part of the title that did not fit on one line'
! # }
! line_based_data.keys.each do |key|
! if ['RC', 'RP', 'RT', 'RL'].include?(key)
! line_based_data[key] = line_based_data[key].join(' ')
! elsif ['RA', 'RX'].include?(key)
! sanitized_data = Array.new
! line_based_data[key].each do |v|
! sanitized_data.push(v.split(/\s*,\s*/))
! end
! line_based_data[key] = sanitized_data.flatten
! elsif key == 'RN'
! line_based_data[key] = line_based_data[key][0].sub(/^\[/,'').sub(/\]$/,'').to_i
end
end
!
! # And put it in @data. @data in the end looks like this:
! # data = [
! # {
! # 'RA' => ["name1", "name2", "name3", "name4"],
! # 'RT' => 'some part of the title that did not fit on one line'
! # },
! # {
! # 'RA' => ["name1", "name2", "name3", "name4"],
! # 'RT' => 'some part of the title that did not fit on one line'
! # }
! # ]
! @data['R'].push(line_based_data)
end
end
@data['R']
--- 241,265 ----
def ref
unless @data['R']
! ary = Array.new
! get('R').split(/\nRN /).each do |str|
! raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
! 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
! str = 'RN ' + str unless /^RN / =~ str
! str.split("\n").each do |line|
! if /^(R[NPXARLCTG]) (.+)/ =~ line
! raw[$1] += $2 + ' '
! else
! raise "Invalid format in R lines, \n[#{line}]\n"
end
end
! raw.each_value {|v|
! v.strip!
! v.sub!(/^"/,'')
! v.sub!(/;$/,'')
! v.sub!(/"$/,'')
! }
! ary.push(raw)
end
+ @data['R'] = ary
end
@data['R']
***************
*** 310,345 ****
def references
unless @data['references']
! @data['references'] = Array.new
! self.ref.each do |ref|
! hash = Hash.new
! ref.each do |key, value|
case key
- when 'RN'
- hash['embl_gb_record_number'] = value
- when 'RC'
- hash['comments'] = value
- when 'RX'
- hash['xrefs'] = value
- when 'RP'
- hash['sequence_position'] = value
when 'RA'
! hash['authors'] = value
when 'RT'
hash['title'] = value
when 'RL'
! hash['journal'] = value
when 'RX' # PUBMED, MEDLINE
! value.each {|item|
! tag, xref = item.split(/; /).map {|i| i.strip }
hash[ tag.downcase ] = xref
}
end
! end
! @data['references'].push(Reference.new(hash))
! end
end
@data['references']
end
# returns contents in the DR line.
# * Bio::EMBLDB::Common#dr -> [ <Database cross-reference Hash>* ]
--- 270,306 ----
def references
unless @data['references']
! ary = self.ref.map {|ent|
! hash = Hash.new('')
! ent.each {|key, value|
case key
when 'RA'
! hash['authors'] = value.split(/, /)
when 'RT'
hash['title'] = value
when 'RL'
! if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
! hash['journal'] = $1.rstrip
! hash['volume'] = $2
! hash['issue'] = $4
! hash['pages'] = $6
! hash['year'] = $7
! else
! hash['journal'] = value
! end
when 'RX' # PUBMED, MEDLINE
! value.split(/\. /).each {|item|
! tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
hash[ tag.downcase ] = xref
}
end
! }
! Reference.new(hash)
! }
! @data['references'] = References.new(ary)
end
@data['references']
end
+
# returns contents in the DR line.
# * Bio::EMBLDB::Common#dr -> [ <Database cross-reference Hash>* ]
More information about the bioruby-cvs
mailing list