From k at dev.open-bio.org Tue Dec 12 18:57:44 2006 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Tue, 12 Dec 2006 23:57:44 +0000 Subject: [BioRuby-cvs] bioruby/doc Design.rd.ja,1.7,NONE Message-ID: <200612122357.kBCNviTu005980@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/doc In directory dev.open-bio.org:/tmp/cvs-serv5976 Removed Files: Design.rd.ja Log Message: * obsoleted --- Design.rd.ja DELETED --- From k at dev.open-bio.org Tue Dec 12 18:58:06 2006 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Tue, 12 Dec 2006 23:58:06 +0000 Subject: [BioRuby-cvs] bioruby/doc TODO.rd.ja,1.16,NONE Message-ID: <200612122358.kBCNw67S006004@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/doc In directory dev.open-bio.org:/tmp/cvs-serv6000 Removed Files: TODO.rd.ja Log Message: * obsoleted --- TODO.rd.ja DELETED --- From k at dev.open-bio.org Tue Dec 12 19:00:17 2006 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Wed, 13 Dec 2006 00:00:17 +0000 Subject: [BioRuby-cvs] bioruby/doc BioRuby.rd.ja,1.10,NONE Message-ID: <200612130000.kBD00HAF006046@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/doc In directory dev.open-bio.org:/tmp/cvs-serv6042 Removed Files: BioRuby.rd.ja Log Message: * obsoleted --- BioRuby.rd.ja DELETED --- From ngoto at dev.open-bio.org Wed Dec 13 10:46:30 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 15:46:30 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db newick.rb,1.1,1.2 Message-ID: <200612131546.kBDFkUYH008108@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory dev.open-bio.org:/tmp/cvs-serv8064/db Modified Files: newick.rb Log Message: NHX (New Hampshire eXtended) input is supported by Bio::Newick class. Bio::PhylogeneticTree supports NHX output (as a string) by #output(:NHX). When outputs tree, indention can be specified by options. Many attributes are added to support Bio::PhylogeneticTree::Node and Bio::PhylogeneticTree::Edge. Node order in original Newick data is stored to Bio::PhylogeneticTree::Node#order_number. Index: newick.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/newick.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** newick.rb 5 Oct 2006 13:38:22 -0000 1.1 --- newick.rb 13 Dec 2006 15:46:28 -0000 1.2 *************** *** 17,22 **** #+++ def __get_option(key, options) ! options[key] or (@options ? @options[key] : nil) end private :__get_option --- 17,31 ---- #+++ + DEFAULT_OPTIONS = + { :indent => ' ' } + def __get_option(key, options) ! if (r = options[key]) != nil then ! r ! elsif @options && (r = @options[key]) != nil then ! r ! else ! DEFAULT_OPTIONS[key] ! end end private :__get_option *************** *** 49,82 **** private :__to_newick_format_leaf # ! def __to_newick(parents, source, depth, options) result = [] ! indent0 = ' ' * depth ! indent = ' ' * (depth + 1) ! self.each_out_edge(source) do |src, tgt, edge| if parents.include?(tgt) then ;; elsif self.out_degree(tgt) == 1 then ! result << indent + __to_newick_format_leaf(tgt, edge, options) else result << ! __to_newick([ src ].concat(parents), tgt, depth + 1, options) + ! __to_newick_format_leaf(tgt, edge, options) end end ! indent0 + "(\n" + result.join(",\n") + ! (result.size > 0 ? "\n" : '') + indent0 + ')' end private :__to_newick # Returns a newick formatted string. ! def newick(options = {}) root = @root root ||= self.nodes.first return '();' unless root ! __to_newick([], root, 0, options) + __to_newick_format_leaf(root, Edge.new, options) + ";\n" end end #class PhylogeneticTree --- 58,212 ---- private :__to_newick_format_leaf + # formats leaf for NHX + def __to_newick_format_leaf_NHX(node, edge, options) + + label = get_node_name(node).to_s + + dist = get_edge_distance_string(edge) + + bs = get_node_bootstrap_string(node) + + if __get_option(:branch_length_style, options) == :disabled + dist = nil + end + + nhx = {} + + # bootstrap + nhx[:B] = bs if bs and !(bs.empty?) + # EC number + nhx[:E] = node.ec_number if node.instance_eval { + defined?(@ec_number) && self.ec_number + } + # scientific name + nhx[:S] = node.scientific_name if node.instance_eval { + defined?(@scientific_name) && self.scientific_name + } + # taxonomy id + nhx[:T] = node.taxonomy_id if node.instance_eval { + defined?(@taxonomy_id) && self.taxonomy_id + } + + # :D (gene duplication or speciation) + if node.instance_eval { defined?(@events) && !(self.events.empty?) } then + if node.events.include?(:gene_duplication) + nhx[:D] = 'Y' + elsif node.events.include?(:speciation) + nhx[:D] = 'N' + end + end + + # log likelihood + nhx[:L] = edge.log_likelihood if edge.instance_eval { + defined?(@log_likelihood) && self.log_likelihood } + # width + nhx[:W] = edge.width if edge.instance_eval { + defined?(@width) && self.width } + + # merges other parameters + flag = node.instance_eval { defined? @nhx_parameters } + nhx.merge!(node.nhx_parameters) if flag + flag = edge.instance_eval { defined? @nhx_parameters } + nhx.merge!(edge.nhx_parameters) if flag + + nhx_string = nhx.keys.sort{ |a,b| a.to_s <=> b.to_s }.collect do |key| + "#{key.to_s}=#{nhx[key].to_s}" + end.join(':') + nhx_string = "[&&NHX:" + nhx_string + "]" unless nhx_string.empty? + + label + (dist ? ":#{dist}" : '') + nhx_string + end + private :__to_newick_format_leaf_NHX + # ! def __to_newick(parents, source, depth, format_leaf, ! options, &block) result = [] ! if indent_string = __get_option(:indent, options) then ! indent0 = indent_string * depth ! indent = indent_string * (depth + 1) ! newline = "\n" ! else ! indent0 = indent = newline = '' ! end ! out_edges = self.out_edges(source) ! if block_given? then ! out_edges.sort! { |edge1, edge2| yield(edge1[1], edge2[1]) } ! else ! out_edges.sort! do |edge1, edge2| ! o1 = edge1[1].order_number ! o2 = edge2[1].order_number ! if o1 and o2 then ! o1 <=> o2 ! else ! edge1[1].name.to_s <=> edge2[1].name.to_s ! end ! end ! end ! out_edges.each do |src, tgt, edge| if parents.include?(tgt) then ;; elsif self.out_degree(tgt) == 1 then ! result << indent + __send__(format_leaf, tgt, edge, options) else result << ! __to_newick([ src ].concat(parents), tgt, depth + 1, ! format_leaf, options) + ! __send__(format_leaf, tgt, edge, options) end end ! indent0 + "(" + newline + result.join(',' + newline) + ! (result.size > 0 ? newline : '') + indent0 + ')' end private :__to_newick # Returns a newick formatted string. ! # If block is given, the order of the node is sorted ! # (as the same manner as Enumerable#sort). ! # Description about options. ! # :indent : indent string; set false to disable (default: ' ') ! # :bootstrap_style : :disabled disables bootstrap representations ! # :traditional traditional style ! # :molphy Molphy style (default) ! def output_newick(options = {}, &block) #:yields: node1, node2 root = @root root ||= self.nodes.first return '();' unless root ! __to_newick([], root, 0, :__to_newick_format_leaf, options, &block) + __to_newick_format_leaf(root, Edge.new, options) + ";\n" end + + alias newick output_newick + + + # Returns a NHX (New Hampshire eXtended) formatted string. + # If block is given, the order of the node is sorted + # (as the same manner as Enumerable#sort). + # Description about options. + # :indent : indent string; set false to disable (default: ' ') + def output_nhx(options = {}, &block) #:yields: node1, node2 + root = @root + root ||= self.nodes.first + return '();' unless root + __to_newick([], root, 0, + :__to_newick_format_leaf_NHX, options, &block) + + __to_newick_format_leaf_NHX(root, Edge.new, options) + + ";\n" + end + + # Returns formatted text (or something) of the tree + # Currently supported format is: :newick, :NHX + def output(format, *arg, &block) + case format + when :newick + output_newick(*arg, &block) + when :NHX + output_nhx(*arg, &block) + else + raise 'Unknown format' + end + end + end #class PhylogeneticTree *************** *** 105,114 **** # _options_ for parsing can be set. # ! # Note: molphy-style bootstrap values are always parsed, even if # the options[:bootstrap_style] is set to :traditional or :disabled. # Note: By default, if all of the internal node's names are numeric ! # and there are no molphy-style boostrap values, ! # the names are regarded as bootstrap values. ! # options[:bootstrap_style] = :disabled or :molphy to disable the feature. def initialize(str, options = nil) str = str.sub(/\;(.*)/m, ';') --- 235,245 ---- # _options_ for parsing can be set. # ! # Note: molphy-style bootstrap values may be parsed, even if # the options[:bootstrap_style] is set to :traditional or :disabled. # Note: By default, if all of the internal node's names are numeric ! # and there are no NHX and no molphy-style boostrap values, ! # the names of internal nodes are regarded as bootstrap values. ! # options[:bootstrap_style] = :disabled or :molphy to disable the feature ! # (or at least one NHX tag exists). def initialize(str, options = nil) str = str.sub(/\;(.*)/m, ';') *************** *** 155,167 **** # Parses newick formatted leaf (or internal node) name. ! def __parse_newick_leaf(str, node, edge) case str when /(.*)\:(.*)\[(.*)\]/ node.name = $1 edge.distance_string = $2 if $2 and !($2.strip.empty?) ! node.bootstrap_string = $3 if $3 and !($3.strip.empty?) when /(.*)\[(.*)\]/ node.name = $1 ! node.bootstrap_string = $2 if $2 and !($2.strip.empty?) when /(.*)\:(.*)/ node.name = $1 --- 286,300 ---- # Parses newick formatted leaf (or internal node) name. ! def __parse_newick_leaf(str, node, edge, options) case str when /(.*)\:(.*)\[(.*)\]/ node.name = $1 edge.distance_string = $2 if $2 and !($2.strip.empty?) ! # bracketted string into bstr ! bstr = $3 when /(.*)\[(.*)\]/ node.name = $1 ! # bracketted string into bstr ! bstr = $2 when /(.*)\:(.*)/ node.name = $1 *************** *** 170,173 **** --- 303,369 ---- node.name = str end + + # determines NHX or Molphy-style bootstrap + if bstr and !(bstr.strip.empty?) + case __get_option(:original_format, options) + when :nhx + # regarded as NHX string which might be broken + __parse_nhx(bstr, node, edge) + when :traditional + # simply ignored + else + case bstr + when /\A\&\&NHX/ + # NHX string + # force to set NHX mode + @options[:original_format] = :nhx + __parse_nhx(bstr, node, edge) + else + # Molphy-style boostrap values + # let molphy mode if nothing determined + @options[:original_format] ||= :molphy + node.bootstrap_string = bstr + end #case bstr + end + end + + # returns true + true + end + + # Parses NHX (New Hampshire eXtended) string + def __parse_nhx(bstr, node, edge) + a = bstr.split(/\:/) + a.shift if a[0] == '&&NHX' + a.each do |str| + tag, val = str.split(/\=/, 2) + case tag + when 'B' + node.bootstrap_string = val + when 'D' + case val + when 'Y' + node.events.push :gene_duplication + when 'N' + node.events.push :speciation + end + when 'E' + node.ec_number = val + when 'L' + edge.log_likelihood = val.to_f + when 'S' + node.scientific_name = val + when 'T' + node.taxonomy_id = val + when 'W' + edge.width = val.to_i + when 'XB' + edge.nhx_parameters[:XB] = val + when 'O', 'SO' + node.nhx_parameters[tag.to_sym] = val.to_i + else # :Co, :SN, :Sw, :XN, and others + node.nhx_parameters[tag.to_sym] = val + end + end #each true end *************** *** 215,219 **** next_token = ary[0] if next_token and next_token != ',' and next_token != ')' then ! __parse_newick_leaf(next_token, cur_node, edge) ary.shift end --- 411,415 ---- next_token = ary[0] if next_token and next_token != ',' and next_token != ')' then ! __parse_newick_leaf(next_token, cur_node, edge, options) ary.shift end *************** *** 226,230 **** leaf = Node.new edge = Edge.new ! __parse_newick_leaf(token, leaf, edge) nodes << leaf edges << Bio::Relation.new(cur_node, leaf, edge) --- 422,426 ---- leaf = Node.new edge = Edge.new ! __parse_newick_leaf(token, leaf, edge, options) nodes << leaf edges << Bio::Relation.new(cur_node, leaf, edge) *************** *** 234,250 **** raise ParseError, 'unmatched parentheses' unless node_stack.empty? bsopt = __get_option(:bootstrap_style, options) ! unless bsopt == :disabled or bsopt == :molphy then ! # If all of the internal node's names are numeric ! # and there are no molphy-style boostrap values, # the names are regarded as bootstrap values. flag = false internal_nodes.each do |node| - if node.bootstrap - unless __get_option(:bootstrap_style, options) == :traditional - @options[:bootstrap_style] = :molphy - end - flag = false - break - end if node.name and !node.name.to_s.strip.empty? then if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name --- 430,440 ---- raise ParseError, 'unmatched parentheses' unless node_stack.empty? bsopt = __get_option(:bootstrap_style, options) ! ofmt = __get_option(:original_format, options) ! unless bsopt == :disabled or bsopt == :molphy or ! ofmt == :nhx or ofmt == :molphy then ! # If all of the internal node's names are numeric, # the names are regarded as bootstrap values. flag = false internal_nodes.each do |node| if node.name and !node.name.to_s.strip.empty? then if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name *************** *** 258,261 **** --- 448,452 ---- if flag then @options[:bootstrap_style] = :traditional + @options[:original_format] = :traditional internal_nodes.each do |node| if node.name then *************** *** 266,274 **** end end # If the root implicitly prepared by the program is a leaf and # there are no additional information for the edge from the root to # the first internal node, the root is removed. if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and ! rel.relation.instance_eval { !defined?(@distance) } and edges.find_all { |x| x.node.include?(root) }.size == 1 nodes.shift --- 457,471 ---- end end + # Sets nodes order numbers + nodes.each_with_index do |node, i| + node.order_number = i + end # If the root implicitly prepared by the program is a leaf and # there are no additional information for the edge from the root to # the first internal node, the root is removed. if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and ! rel.relation.instance_eval { ! !defined?(@distance) and !defined?(@log_likelihood) and ! !defined?(@width) and !defined?(@nhx_parameters) } and edges.find_all { |x| x.node.include?(root) }.size == 1 nodes.shift From ngoto at dev.open-bio.org Wed Dec 13 10:46:30 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 15:46:30 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio phylogenetictree.rb,1.1,1.2 Message-ID: <200612131546.kBDFkUUO008111@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv8064 Modified Files: phylogenetictree.rb Log Message: NHX (New Hampshire eXtended) input is supported by Bio::Newick class. Bio::PhylogeneticTree supports NHX output (as a string) by #output(:NHX). When outputs tree, indention can be specified by options. Many attributes are added to support Bio::PhylogeneticTree::Node and Bio::PhylogeneticTree::Edge. Node order in original Newick data is stored to Bio::PhylogeneticTree::Node#order_number. Index: phylogenetictree.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/phylogenetictree.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** phylogenetictree.rb 5 Oct 2006 13:38:21 -0000 1.1 --- phylogenetictree.rb 13 Dec 2006 15:46:28 -0000 1.2 *************** *** 74,77 **** --- 74,101 ---- @distance_string.to_s end + + #--- + # methods for NHX (New Hampshire eXtended) and/or PhyloXML + #+++ + + # log likelihood value (:L in NHX) + attr_accessor :log_likelihood + + # width of the edge + # ( of PhyloXML, or :W="w" in NHX) + attr_accessor :width + + # Other NHX parameters. Returns a Hash. + # Note that :L and :W + # are not stored here but stored in the proper attributes in this class. + # However, if you force to set these parameters in this hash, + # the parameters in this hash are preferred when generating NHX. + # In addition, If the same parameters are defined at Node object, + # the parameters in the node are preferred. + def nhx_parameters + @nhx_parameters ||= {} + @nhx_parameters + end + end #class Edge *************** *** 165,168 **** --- 189,229 ---- @name.to_s end + + # the order of the node + # (lower value, high priority) + attr_accessor :order_number + + #--- + # methods for NHX (New Hampshire eXtended) and/or PhyloXML + #+++ + + # Phylogenetic events. + # Returns an Array of one (or more?) of the following symbols + # :gene_duplication + # :speciation + def events + @events ||= [] + @events + end + + # EC number (EC_number in PhyloXML, or :E in NHX) + attr_accessor :ec_number + + # scientific name (scientific_name in PhyloXML, or :S in NHX) + attr_accessor :scientific_name + + # taxonomy identifier (taxonomy_identifier in PhyloXML, or :T in NHX) + attr_accessor :taxonomy_id + + # Other NHX parameters. Returns a Hash. + # Note that :D, :E, :S, and :T + # are not stored here but stored in the proper attributes in this class. + # However, if you force to set these parameters in this hash, + # the parameters in this hash are preferred when generating NHX. + def nhx_parameters + @nhx_parameters ||= {} + @nhx_parameters + end + end #class Node From ngoto at dev.open-bio.org Wed Dec 13 10:55:31 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 15:55:31 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db newick.rb,1.2,1.3 Message-ID: <200612131555.kBDFtVp3008178@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory dev.open-bio.org:/tmp/cvs-serv8158/lib/bio/db Modified Files: newick.rb Log Message: added "require bio/phylogenetictree" Index: newick.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/newick.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** newick.rb 13 Dec 2006 15:46:28 -0000 1.2 --- newick.rb 13 Dec 2006 15:55:29 -0000 1.3 *************** *** 10,13 **** --- 10,15 ---- # + require 'bio/phylogenetictree' + module Bio class PhylogeneticTree From ngoto at dev.open-bio.org Wed Dec 13 11:01:37 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:01:37 +0000 Subject: [BioRuby-cvs] bioruby/lib bio.rb,1.70,1.71 Message-ID: <200612131601.kBDG1bBL008208@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib In directory dev.open-bio.org:/tmp/cvs-serv8186/lib Modified Files: bio.rb Log Message: added autoload of Bio::PhylogeneticTree and Bio::Newick. Index: bio.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio.rb,v retrieving revision 1.70 retrieving revision 1.71 diff -C2 -d -r1.70 -r1.71 *** bio.rb 19 Sep 2006 05:41:45 -0000 1.70 --- bio.rb 13 Dec 2006 16:01:35 -0000 1.71 *************** *** 43,46 **** --- 43,48 ---- autoload :Alignment, 'bio/alignment' + ## PhylogeneticTree + autoload :PhylogeneticTree, 'bio/phylogenetictree' ## Map *************** *** 115,118 **** --- 117,121 ---- autoload :NBRF, 'bio/db/nbrf' + autoload :Newick, 'bio/db/newick' ### IO interface modules From ngoto at dev.open-bio.org Wed Dec 13 11:29:39 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:29:39 +0000 Subject: [BioRuby-cvs] bioruby/lib bio.rb,1.71,1.72 Message-ID: <200612131629.kBDGTduQ008839@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib In directory dev.open-bio.org:/tmp/cvs-serv8729/lib Modified Files: bio.rb Log Message: Bio::PhylogeneticTree is renamed to Bio::Tree and filenames are also renamed from phylogenetictree.rb to tree.rb and from test_phylogenetictree.rb to test_tree.rb. Index: bio.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio.rb,v retrieving revision 1.71 retrieving revision 1.72 diff -C2 -d -r1.71 -r1.72 *** bio.rb 13 Dec 2006 16:01:35 -0000 1.71 --- bio.rb 13 Dec 2006 16:29:36 -0000 1.72 *************** *** 43,48 **** autoload :Alignment, 'bio/alignment' ! ## PhylogeneticTree ! autoload :PhylogeneticTree, 'bio/phylogenetictree' ## Map --- 43,48 ---- autoload :Alignment, 'bio/alignment' ! ## Tree ! autoload :Tree, 'bio/tree' ## Map From ngoto at dev.open-bio.org Wed Dec 13 11:29:39 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:29:39 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db newick.rb,1.3,1.4 Message-ID: <200612131629.kBDGTdFZ008849@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory dev.open-bio.org:/tmp/cvs-serv8729/lib/bio/db Modified Files: newick.rb Log Message: Bio::PhylogeneticTree is renamed to Bio::Tree and filenames are also renamed from phylogenetictree.rb to tree.rb and from test_phylogenetictree.rb to test_tree.rb. Index: newick.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/newick.rb,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** newick.rb 13 Dec 2006 15:55:29 -0000 1.3 --- newick.rb 13 Dec 2006 16:29:37 -0000 1.4 *************** *** 10,17 **** # ! require 'bio/phylogenetictree' module Bio ! class PhylogeneticTree #--- --- 10,17 ---- # ! require 'bio/tree' module Bio ! class Tree #--- *************** *** 211,215 **** end ! end #class PhylogeneticTree #--- --- 211,215 ---- end ! end #class Tree #--- *************** *** 228,236 **** class ParseError < RuntimeError; end ! # same as Bio::PhylogeneticTree::Edge ! Edge = Bio::PhylogeneticTree::Edge ! # same as Bio::PhylogeneticTree::Node ! Node = Bio::PhylogeneticTree::Node # Creates a new Newick object. --- 228,236 ---- class ParseError < RuntimeError; end ! # same as Bio::Tree::Edge ! Edge = Bio::Tree::Edge ! # same as Bio::Tree::Node ! Node = Bio::Tree::Node # Creates a new Newick object. *************** *** 262,266 **** # Gets the tree. ! # Returns a Bio::PhylogeneticTree object. def tree if !defined?(@tree) --- 262,266 ---- # Gets the tree. ! # Returns a Bio::Tree object. def tree if !defined?(@tree) *************** *** 475,479 **** end # Let the tree into instance variables ! tree = Bio::PhylogeneticTree.new tree.instance_eval { @pathway.relations.concat(edges) --- 475,479 ---- end # Let the tree into instance variables ! tree = Bio::Tree.new tree.instance_eval { @pathway.relations.concat(edges) From ngoto at dev.open-bio.org Wed Dec 13 11:29:39 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:29:39 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db test_newick.rb,1.1,1.2 Message-ID: <200612131629.kBDGTduG008859@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db In directory dev.open-bio.org:/tmp/cvs-serv8729/test/unit/bio/db Modified Files: test_newick.rb Log Message: Bio::PhylogeneticTree is renamed to Bio::Tree and filenames are also renamed from phylogenetictree.rb to tree.rb and from test_phylogenetictree.rb to test_tree.rb. Index: test_newick.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/db/test_newick.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_newick.rb 5 Oct 2006 13:38:22 -0000 1.1 --- test_newick.rb 13 Dec 2006 16:29:37 -0000 1.2 *************** *** 17,21 **** require 'bio' ! require 'bio/phylogenetictree' require 'bio/db/newick' --- 17,21 ---- require 'bio' ! require 'bio/tree' require 'bio/db/newick' From ngoto at dev.open-bio.org Wed Dec 13 11:29:39 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:29:39 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio tree.rb,1.2,1.3 Message-ID: <200612131629.kBDGTd34008842@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv8729/lib/bio Modified Files: tree.rb Log Message: Bio::PhylogeneticTree is renamed to Bio::Tree and filenames are also renamed from phylogenetictree.rb to tree.rb and from test_phylogenetictree.rb to test_tree.rb. Index: tree.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/tree.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** tree.rb 13 Dec 2006 15:46:28 -0000 1.2 --- tree.rb 13 Dec 2006 16:29:37 -0000 1.3 *************** *** 1,4 **** # ! # = bio/phylogenetictree.rb - phylogenetic tree data structure class # # Copyright:: Copyright (C) 2006 --- 1,4 ---- # ! # = bio/tree.rb - phylogenetic tree data structure class # # Copyright:: Copyright (C) 2006 *************** *** 21,25 **** # # This is alpha version. Incompatible changes may be made frequently. ! class PhylogeneticTree # Error when there are no path between specified nodes --- 21,25 ---- # # This is alpha version. Incompatible changes may be made frequently. ! class Tree # Error when there are no path between specified nodes *************** *** 255,259 **** # Creates a new phylogenetic tree. # When no arguments are given, it creates a new empty tree. ! # When a PhylogeneticTree object is given, it copies the tree. # Note that the new tree shares Node and Edge objects # with the given tree. --- 255,259 ---- # Creates a new phylogenetic tree. # When no arguments are given, it creates a new empty tree. ! # When a Tree object is given, it copies the tree. # Note that the new tree shares Node and Edge objects # with the given tree. *************** *** 499,503 **** # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. ! # Returns a PhylogeneticTree object. # Note that the sub-tree shares Node and Edge objects # with the original tree. --- 499,503 ---- # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. ! # Returns a Tree object. # Note that the sub-tree shares Node and Edge objects # with the original tree. *************** *** 524,528 **** # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. ! # Returns a PhylogeneticTree object. # The result is unspecified for cyclic trees. # Note that the sub-tree shares Node and Edge objects --- 524,528 ---- # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. ! # Returns a Tree object. # The result is unspecified for cyclic trees. # Note that the sub-tree shares Node and Edge objects *************** *** 551,555 **** # If the same edge exists, the edge in _other_ is used. # Returns self. ! # The result is unspecified if _other_ isn't a PhylogeneticTree object. # Note that the Node and Edge objects in the _other_ tree are # shared in the concatinated tree. --- 551,555 ---- # If the same edge exists, the edge in _other_ is used. # Returns self. ! # The result is unspecified if _other_ isn't a Tree object. # Note that the Node and Edge objects in the _other_ tree are # shared in the concatinated tree. *************** *** 816,820 **** self end ! end #class PhylogeneticTree end #module Bio --- 816,820 ---- self end ! end #class Tree end #module Bio From ngoto at dev.open-bio.org Wed Dec 13 11:29:39 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:29:39 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_tree.rb,1.2,1.3 Message-ID: <200612131629.kBDGTd1C008854@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory dev.open-bio.org:/tmp/cvs-serv8729/test/unit/bio Modified Files: test_tree.rb Log Message: Bio::PhylogeneticTree is renamed to Bio::Tree and filenames are also renamed from phylogenetictree.rb to tree.rb and from test_phylogenetictree.rb to test_tree.rb. Index: test_tree.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/test_tree.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** test_tree.rb 6 Oct 2006 14:18:51 -0000 1.2 --- test_tree.rb 13 Dec 2006 16:29:37 -0000 1.3 *************** *** 1,4 **** # ! # = test/bio/test_phylogenetictree.rb - unit test for Bio::PhylogeneticTree # # Copyright:: Copyright (C) 2006 --- 1,4 ---- # ! # = test/bio/test_tree.rb - unit test for Bio::Tree # # Copyright:: Copyright (C) 2006 *************** *** 16,31 **** require 'bio' ! require 'bio/phylogenetictree' module Bio ! class TestPhylogeneticTreeEdge < Test::Unit::TestCase def setup ! @obj = Bio::PhylogeneticTree::Edge.new(123.45) end def test_initialize ! assert_nothing_raised { Bio::PhylogeneticTree::Edge.new } ! assert_equal(1.23, Bio::PhylogeneticTree::Edge.new(1.23).distance) ! assert_equal(12.3, Bio::PhylogeneticTree::Edge.new('12.3').distance) end --- 16,31 ---- require 'bio' ! require 'bio/tree' module Bio ! class TestTreeEdge < Test::Unit::TestCase def setup ! @obj = Bio::Tree::Edge.new(123.45) end def test_initialize ! assert_nothing_raised { Bio::Tree::Edge.new } ! assert_equal(1.23, Bio::Tree::Edge.new(1.23).distance) ! assert_equal(12.3, Bio::Tree::Edge.new('12.3').distance) end *************** *** 63,77 **** assert_equal("123.45", @obj.to_s) end ! end #class TestPhylogeneticTreeEdge ! class TestPhylogeneticTreeNode < Test::Unit::TestCase def setup ! @obj = Bio::PhylogeneticTree::Node.new end def test_initialize ! assert_nothing_raised { Bio::PhylogeneticTree::Node.new } a = nil ! assert_nothing_raised { a = Bio::PhylogeneticTree::Node.new('mouse') } assert_equal('mouse', a.name) end --- 63,77 ---- assert_equal("123.45", @obj.to_s) end ! end #class TestTreeEdge ! class TestTreeNode < Test::Unit::TestCase def setup ! @obj = Bio::Tree::Node.new end def test_initialize ! assert_nothing_raised { Bio::Tree::Node.new } a = nil ! assert_nothing_raised { a = Bio::Tree::Node.new('mouse') } assert_equal('mouse', a.name) end *************** *** 123,137 **** assert_equal('human', @obj.to_s) end ! end #class TestPhylogeneticTreeNode ! class TestPhylogeneticTree < Test::Unit::TestCase def setup ! @tree = Bio::PhylogeneticTree.new end def test_get_edge_distance ! edge = Bio::PhylogeneticTree::Edge.new assert_equal(nil, @tree.get_edge_distance(edge)) ! edge = Bio::PhylogeneticTree::Edge.new(12.34) assert_equal(12.34, @tree.get_edge_distance(edge)) assert_equal(12.34, @tree.get_edge_distance(12.34)) --- 123,137 ---- assert_equal('human', @obj.to_s) end ! end #class TestTreeNode ! class TestTree < Test::Unit::TestCase def setup ! @tree = Bio::Tree.new end def test_get_edge_distance ! edge = Bio::Tree::Edge.new assert_equal(nil, @tree.get_edge_distance(edge)) ! edge = Bio::Tree::Edge.new(12.34) assert_equal(12.34, @tree.get_edge_distance(edge)) assert_equal(12.34, @tree.get_edge_distance(12.34)) *************** *** 139,145 **** def test_get_edge_distance_string ! edge = Bio::PhylogeneticTree::Edge.new assert_equal(nil, @tree.get_edge_distance_string(edge)) ! edge = Bio::PhylogeneticTree::Edge.new(12.34) assert_equal("12.34", @tree.get_edge_distance_string(edge)) assert_equal("12.34", @tree.get_edge_distance_string(12.34)) --- 139,145 ---- def test_get_edge_distance_string ! edge = Bio::Tree::Edge.new assert_equal(nil, @tree.get_edge_distance_string(edge)) ! edge = Bio::Tree::Edge.new(12.34) assert_equal("12.34", @tree.get_edge_distance_string(edge)) assert_equal("12.34", @tree.get_edge_distance_string(12.34)) *************** *** 147,151 **** def test_get_node_name ! node = Bio::PhylogeneticTree::Node.new assert_equal(nil, @tree.get_node_name(node)) node.name = 'human' --- 147,151 ---- def test_get_node_name ! node = Bio::Tree::Node.new assert_equal(nil, @tree.get_node_name(node)) node.name = 'human' *************** *** 154,159 **** def test_initialize ! assert_nothing_raised { Bio::PhylogeneticTree.new } ! assert_nothing_raised { Bio::PhylogeneticTree.new(@tree) } end --- 154,159 ---- def test_initialize ! assert_nothing_raised { Bio::Tree.new } ! assert_nothing_raised { Bio::Tree.new(@tree) } end *************** *** 164,168 **** def test_root=() assert_equal(nil, @tree.root) ! node = Bio::PhylogeneticTree::Node.new @tree.root = node assert_equal(node, @tree.root) --- 164,168 ---- def test_root=() assert_equal(nil, @tree.root) ! node = Bio::Tree::Node.new @tree.root = node assert_equal(node, @tree.root) *************** *** 175,199 **** end ! end #class TestPhylogeneticTree ! class TestPhylogeneticTree2 < Test::Unit::TestCase def setup # Note that below data is NOT real. The distances are random. ! @tree = Bio::PhylogeneticTree.new ! @mouse = Bio::PhylogeneticTree::Node.new('mouse') ! @rat = Bio::PhylogeneticTree::Node.new('rat') ! @rodents = Bio::PhylogeneticTree::Node.new('rodents') ! @human = Bio::PhylogeneticTree::Node.new('human') ! @chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') ! @primates = Bio::PhylogeneticTree::Node.new('primates') ! @mammals = Bio::PhylogeneticTree::Node.new('mammals') @nodes = [ @mouse, @rat, @rodents, @human, @chimpanzee, @primates, @mammals ] ! @edge_rodents_mouse = Bio::PhylogeneticTree::Edge.new(0.0968) ! @edge_rodents_rat = Bio::PhylogeneticTree::Edge.new(0.1125) ! @edge_mammals_rodents = Bio::PhylogeneticTree::Edge.new(0.2560) ! @edge_primates_human = Bio::PhylogeneticTree::Edge.new(0.0386) ! @edge_primates_chimpanzee = Bio::PhylogeneticTree::Edge.new(0.0503) ! @edge_mammals_primates = Bio::PhylogeneticTree::Edge.new(0.2235) @edges = [ [ @rodents, @mouse, @edge_rodents_mouse ], --- 175,199 ---- end ! end #class TestTree ! class TestTree2 < Test::Unit::TestCase def setup # Note that below data is NOT real. The distances are random. ! @tree = Bio::Tree.new ! @mouse = Bio::Tree::Node.new('mouse') ! @rat = Bio::Tree::Node.new('rat') ! @rodents = Bio::Tree::Node.new('rodents') ! @human = Bio::Tree::Node.new('human') ! @chimpanzee = Bio::Tree::Node.new('chimpanzee') ! @primates = Bio::Tree::Node.new('primates') ! @mammals = Bio::Tree::Node.new('mammals') @nodes = [ @mouse, @rat, @rodents, @human, @chimpanzee, @primates, @mammals ] ! @edge_rodents_mouse = Bio::Tree::Edge.new(0.0968) ! @edge_rodents_rat = Bio::Tree::Edge.new(0.1125) ! @edge_mammals_rodents = Bio::Tree::Edge.new(0.2560) ! @edge_primates_human = Bio::Tree::Edge.new(0.0386) ! @edge_primates_chimpanzee = Bio::Tree::Edge.new(0.0503) ! @edge_mammals_primates = Bio::Tree::Edge.new(0.2235) @edges = [ [ @rodents, @mouse, @edge_rodents_mouse ], *************** *** 263,267 **** @tree.adjacent_nodes(@mammals).sort(&@by_id)) # test for not existed nodes ! assert_equal([], @tree.adjacent_nodes(Bio::PhylogeneticTree::Node.new)) end --- 263,267 ---- @tree.adjacent_nodes(@mammals).sort(&@by_id)) # test for not existed nodes ! assert_equal([], @tree.adjacent_nodes(Bio::Tree::Node.new)) end *************** *** 314,318 **** # test for not existed nodes ! assert_equal([], @tree.out_edges(Bio::PhylogeneticTree::Node.new)) end --- 314,318 ---- # test for not existed nodes ! assert_equal([], @tree.out_edges(Bio::Tree::Node.new)) end *************** *** 397,401 **** # test for not existed nodes flag = nil ! node = Bio::PhylogeneticTree::Node.new r = @tree.each_out_edge(node) do |src, tgt, edge| flag = true --- 397,401 ---- # test for not existed nodes flag = nil ! node = Bio::Tree::Node.new r = @tree.each_out_edge(node) do |src, tgt, edge| flag = true *************** *** 405,409 **** end ! end #class TestPhylogeneticTree2 end #module Bio --- 405,409 ---- end ! end #class TestTree2 end #module Bio From ngoto at dev.open-bio.org Wed Dec 13 11:58:41 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 16:58:41 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio alignment.rb,1.16,1.17 Message-ID: <200612131658.kBDGwfYL009269@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv9231/lib/bio Modified Files: alignment.rb Log Message: changed RDoc Index: alignment.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/alignment.rb,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** alignment.rb 30 Apr 2006 05:56:40 -0000 1.16 --- alignment.rb 13 Dec 2006 16:58:39 -0000 1.17 *************** *** 2,7 **** # = bio/alignment.rb - multiple alignment of sequences # ! # Copyright:: Copyright (C) 2003, 2005 ! # GOTO Naohisa # # License:: Ruby's --- 2,7 ---- # = bio/alignment.rb - multiple alignment of sequences # ! # Copyright:: Copyright (C) 2003, 2005, 2006 ! # GOTO Naohisa # # License:: Ruby's *************** *** 26,69 **** module Bio ! =begin rdoc ! ! = About Bio::Alignment ! ! Bio::Alignment is a namespace of classes/modules for multiple sequence ! alignment. ! ! = Multiple alignment container classes ! ! == Bio::Alignment::OriginalAlignment ! ! == Bio::Alignment::SequenceArray ! ! == Bio::Alignment::SequenceHash ! ! = Bio::Alignment::Site ! ! = Modules ! ! == Bio::Alignment::EnumerableExtension ! ! Mix-in for classes included Enumerable. ! ! == Bio::Alignment::ArrayExtension ! ! Mix-in for Array or Array-like classes. ! ! == Bio::Alignment::HashExtension ! ! Mix-in for Hash or Hash-like classes. ! ! == Bio::Alignment::SiteMethods ! ! == Bio::Alignment::PropertyMethods ! ! = Bio::Alignment::GAP ! ! = Compatibility from older BioRuby ! ! =end module Alignment --- 26,67 ---- module Bio ! # ! # = About Bio::Alignment ! # ! # Bio::Alignment is a namespace of classes/modules for multiple sequence ! # alignment. ! # ! # = Multiple alignment container classes ! # ! # == Bio::Alignment::OriginalAlignment ! # ! # == Bio::Alignment::SequenceArray ! # ! # == Bio::Alignment::SequenceHash ! # ! # = Bio::Alignment::Site ! # ! # = Modules ! # ! # == Bio::Alignment::EnumerableExtension ! # ! # Mix-in for classes included Enumerable. ! # ! # == Bio::Alignment::ArrayExtension ! # ! # Mix-in for Array or Array-like classes. ! # ! # == Bio::Alignment::HashExtension ! # ! # Mix-in for Hash or Hash-like classes. ! # ! # == Bio::Alignment::SiteMethods ! # ! # == Bio::Alignment::PropertyMethods ! # ! # = Bio::Alignment::GAP ! # ! # = Compatibility from older BioRuby ! # module Alignment From ngoto at dev.open-bio.org Wed Dec 13 12:29:20 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Wed, 13 Dec 2006 17:29:20 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db newick.rb,1.4,1.5 Message-ID: <200612131729.kBDHTKrR010219@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory dev.open-bio.org:/tmp/cvs-serv10199/lib/bio/db Modified Files: newick.rb Log Message: output(:NHX) is changed to output(:nhx) Index: newick.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/newick.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** newick.rb 13 Dec 2006 16:29:37 -0000 1.4 --- newick.rb 13 Dec 2006 17:29:17 -0000 1.5 *************** *** 199,208 **** # Returns formatted text (or something) of the tree ! # Currently supported format is: :newick, :NHX def output(format, *arg, &block) case format when :newick output_newick(*arg, &block) ! when :NHX output_nhx(*arg, &block) else --- 199,208 ---- # Returns formatted text (or something) of the tree ! # Currently supported format is: :newick, :nhx def output(format, *arg, &block) case format when :newick output_newick(*arg, &block) ! when :nhx output_nhx(*arg, &block) else From ngoto at dev.open-bio.org Thu Dec 14 07:39:48 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 12:39:48 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio alignment.rb,1.17,1.18 Message-ID: <200612141239.kBECdmPI013100@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv13080/lib/bio Modified Files: alignment.rb Log Message: Bio::Alignment::ClustalWFormatter was removed and methods were renemed and moved to Bio::Alignment::Output. Output of Phylip interleaved and non-interleaved and Molphy multiple alignment formats are supported. Some bug fix about ClustalW output about SequenceHash. Some changes in SequenceHash. Bio::Alignment::EnumerableExtension#sequnece_names are newly added. to_fasta and to_clustal methods are now obsoleted. Instead, please use output methods. Index: alignment.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/alignment.rb,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** alignment.rb 13 Dec 2006 16:58:39 -0000 1.17 --- alignment.rb 14 Dec 2006 12:39:45 -0000 1.18 *************** *** 623,627 **** elsif seqclass == Bio::Sequence::NA then amino = false ! elsif self.find { |x| /[EFILPQ]/i =~ x } then amino = true else --- 623,627 ---- elsif seqclass == Bio::Sequence::NA then amino = false ! elsif self.each_seq { |x| /[EFILPQ]/i =~ x } then amino = true else *************** *** 856,869 **** end #module EnumerableExtension ! # ClustalWFormatter is a module to create ClustalW-formatted text ! # from an alignment object. ! # ! # It will be obsoleted and the methods will be frequently changed. ! module ClustalWFormatter ! # Check whether there are same names. # # array:: names of the sequences (array of string) # len:: length to check (default:30) ! def have_same_name?(array, len = 30) na30 = array.collect do |k| k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s --- 856,882 ---- end #module EnumerableExtension ! module Output ! def output(format, *arg) ! case format ! when :clustal ! output_clustal(*arg) ! when :fasta ! output_fasta(*arg) ! when :phylip ! output_phylip(*arg) ! when :phylipnon ! output_phylipnon(*arg) ! when :molphy ! output_molphy(*arg) ! else ! raise "Unknown format: #{format.inspect}" ! end ! end ! ! # Check whether there are same names for ClustalW format. # # array:: names of the sequences (array of string) # len:: length to check (default:30) ! def __clustal_have_same_name?(array, len = 30) na30 = array.collect do |k| k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s *************** *** 892,904 **** end end ! private :have_same_name? ! # Changes sequence names if there are conflicted names. # # array:: names of the sequences (array of string) # len:: length to check (default:30) ! def avoid_same_name(array, len = 30) na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') } ! if dupidx = have_same_name?(na, len) procs = [ Proc.new { |s, i| --- 905,918 ---- end end ! private :__clustal_have_same_name? ! # Changes sequence names if there are conflicted names ! # for ClustalW format. # # array:: names of the sequences (array of string) # len:: length to check (default:30) ! def __clustal_avoid_same_name(array, len = 30) na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') } ! if dupidx = __clustal_have_same_name?(na, len) procs = [ Proc.new { |s, i| *************** *** 914,918 **** na[i] = pr.call(s.to_s, i) end ! dupidx = have_same_name?(na, len) break unless dupidx end --- 928,932 ---- na[i] = pr.call(s.to_s, i) end ! dupidx = __clustal_have_same_name?(na, len) break unless dupidx end *************** *** 925,929 **** na end ! private :avoid_same_name # Generates ClustalW-formatted text --- 939,943 ---- na end ! private :__clustal_avoid_same_name # Generates ClustalW-formatted text *************** *** 931,935 **** # names:: names of the sequences # options:: options ! def clustalw_formatter(seqs, names, options = {}) #(original) aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ] --- 945,949 ---- # names:: names of the sequences # options:: options ! def __clustal_formatter(seqs, names, options = {}) #(original) aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ] *************** *** 946,950 **** end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] ! sn = avoid_same_name(sn) end --- 960,964 ---- end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] ! sn = __clustal_avoid_same_name(sn) end *************** *** 971,976 **** mline = (options[:match_line] or seqs.match_line(mopt)) ! aseqs = seqs.collect do |s| ! s.to_s.gsub(seqs.gap_regexp, gchar) end case options[:case].to_s --- 985,991 ---- mline = (options[:match_line] or seqs.match_line(mopt)) ! aseqs = Array.new(seqs.size).clear ! seqs.each_seq do |s| ! aseqs << s.to_s.gsub(seqs.gap_regexp, gchar) end case options[:case].to_s *************** *** 1006,1012 **** aln.join('') end ! private :clustalw_formatter ! end #module ClustalWFormatter # Bio::Alignment::ArrayExtension is a set of useful methods for --- 1021,1190 ---- aln.join('') end ! private :__clustal_formatter ! ! # Generates ClustalW-formatted text ! # seqs:: sequences (must be an alignment object) ! # names:: names of the sequences ! # options:: options ! def output_clustal(options = {}) ! __clustal_formatter(self, self.sequence_names, options) ! end ! ! # to_clustal is deprecated. Instead, please use output_clustal. ! #--- ! #alias to_clustal output_clustal ! #+++ ! def to_clustal(*arg) ! warn "to_clustal is deprecated. Please use output_clustal." ! output_clustal(*arg) ! end ! ! # Generates fasta format text and returns a string. ! def output_fasta(options={}) ! #(original) ! width = (options[:width] or 70) ! if options[:avoid_same_name] then ! na = __clustal_avoid_same_name(self.sequence_names, 30) ! else ! na = self.sequence_names.collect do |k| ! k.to_s.gsub(/[\r\n\x00]/, ' ') ! end ! end ! if width and width > 0 then ! w_reg = Regexp.new(".{1,#{width}}") ! self.collect do |s| ! ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n") ! end.join('') ! else ! self.collect do |s| ! ">#{na.shift}\n" + s.to_s + "\n" ! end.join('') ! end ! end ! ! # generates phylip interleaved alignment format as a string ! def output_phylip(options = {}) ! aln, aseqs, lines = __output_phylip_common(options) ! lines.times do ! aseqs.each { |a| aln << a.shift } ! aln << "\n" ! end ! aln.pop if aln[-1] == "\n" ! aln.join('') ! end ! ! # generates Phylip3.2 (old) non-interleaved format as a string ! def output_phylipnon(options = {}) ! aln, aseqs, lines = __output_phylip_common(options) ! aln.first + aseqs.join('') ! end + # common routine for interleaved/non-interleaved phylip format + def __output_phylip_common(options = {}) + len = self.alignment_length + aln = [ " #{self.size} #{len}\n" ] + sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } + if options[:replace_space] + sn.collect! { |x| x.gsub(/\s/, '_') } + end + if !options.has_key?(:escape) or options[:escape] + sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } + end + if !options.has_key?(:split) or options[:split] + sn.collect! { |x| x.split(/\s/)[0].to_s } + end + if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] + sn = __clustal_avoid_same_name(sn, 10) + end + + namewidth = 10 + seqwidth = (options[:width] or 60) + seqwidth = seqwidth.div(10) * 10 + seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})") + gchar = (options[:gap_char] or '-') + + aseqs = Array.new(len).clear + self.each_seq do |s| + aseqs << s.to_s.gsub(self.gap_regexp, gchar) + end + case options[:case].to_s + when /lower/i + aseqs.each { |s| s.downcase! } + when /upper/i + aseqs.each { |s| s.upcase! } + end + + aseqs.collect! do |s| + snx = sn.shift + head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] + head2 = ' ' * namewidth + s << (gchar * (len - s.length)) + s.gsub!(/(.{1,10})/n, " \\1") + s.gsub!(seqregexp, "\\1\n") + a = s.split(/^/) + head += a.shift + ret = a.collect { |x| head2 + x } + ret.unshift(head) + ret + end + lines = (len + seqwidth - 1).div(seqwidth) + [ aln, aseqs, lines ] + end + + # Generates Molphy alignment format text as a string + def output_molphy(options = {}) + len = self.alignment_length + header = "#{self.size} #{len}\n" + sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } + if options[:replace_space] + sn.collect! { |x| x.gsub(/\s/, '_') } + end + if !options.has_key?(:escape) or options[:escape] + sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } + end + if !options.has_key?(:split) or options[:split] + sn.collect! { |x| x.split(/\s/)[0].to_s } + end + if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] + sn = __clustal_avoid_same_name(sn, 30) + end + + seqwidth = (options[:width] or 60) + seqregexp = Regexp.new("(.{1,#{seqwidth}})") + gchar = (options[:gap_char] or '-') + + aseqs = Array.new(len).clear + self.each_seq do |s| + aseqs << s.to_s.gsub(self.gap_regexp, gchar) + end + case options[:case].to_s + when /lower/i + aseqs.each { |s| s.downcase! } + when /upper/i + aseqs.each { |s| s.upcase! } + end + + aseqs.collect! do |s| + s << (gchar * (len - s.length)) + s.gsub!(seqregexp, "\\1\n") + sn.shift + "\n" + s + end + aseqs.unshift(header) + aseqs.join('') + end + end #module Output + + module EnumerableExtension + include Output + + # Returns an array of sequence names. + # The order of the names must be the same as + # the order of each_seq. + def sequence_names + i = 0 + self.each_seq { |s| i += 1 } + (0...i).to_a + end + end #module EnumerableExtension # Bio::Alignment::ArrayExtension is a set of useful methods for *************** *** 1028,1037 **** each(&block) end - - include ClustalWFormatter - # Returns a string of Clustal W formatted text of the alignment. - def to_clustal(options = {}) - clustalw_formatter(self, (0...(self.size)).to_a, options) - end end #module ArrayExtension --- 1206,1209 ---- *************** *** 1060,1065 **** # # It works the same as Hash#each_value. ! def each_seq(&block) #:yields: seq ! each_value(&block) end --- 1232,1238 ---- # # It works the same as Hash#each_value. ! def each_seq #:yields: seq ! #each_value(&block) ! each_key { |k| yield self[k] } end *************** *** 1123,1135 **** end ! include ClustalWFormatter ! # Returns a string of Clustal W formatted text of the alignment. ! def to_clustal(options = {}) ! seqs = SequenceArray.new ! names = self.keys ! names.each do |k| ! seqs << self[k] ! end ! clustalw_formatter(seqs, names, options) end end #module HashExtension --- 1296,1304 ---- end ! # Returns an array of sequence names. ! # The order of the names must be the same as ! # the order of each_seq. ! def sequence_names ! self.keys end end #module HashExtension *************** *** 1783,1787 **** width = options[:width] unless width if options[:avoid_same_name] then ! na = avoid_same_name(self.keys, 30) else na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') } --- 1952,1956 ---- width = options[:width] unless width if options[:avoid_same_name] then ! na = __clustal_avoid_same_name(self.keys, 30) else na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') } *************** *** 1814,1828 **** # # The specification of the argument will be changed. def to_fasta(*arg) #(original) self.to_fasta_array(*arg).join('') end - include ClustalWFormatter - # Returns a string of Clustal W formatted text of the alignment. - def to_clustal(options = {}) - clustalw_formatter(self, self.keys, options) - end - # The method name consensus will be obsoleted. # Please use consensus_string instead. --- 1983,1995 ---- # # The specification of the argument will be changed. + # + # Note: to_fasta is deprecated. + # Please use output_fasta instead. def to_fasta(*arg) #(original) + warn "to_fasta is deprecated. Please use output_fasta." self.to_fasta_array(*arg).join('') end # The method name consensus will be obsoleted. # Please use consensus_string instead. From ngoto at dev.open-bio.org Thu Dec 14 09:10:59 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 14:10:59 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_alignment.rb,1.7,1.8 Message-ID: <200612141410.kBEEAxCp013352@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory dev.open-bio.org:/tmp/cvs-serv13312/test/unit/bio Modified Files: test_alignment.rb Log Message: Unit tests changed following the changes of Bio::Alignment. Index: test_alignment.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/test_alignment.rb,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** test_alignment.rb 24 Jan 2006 14:11:34 -0000 1.7 --- test_alignment.rb 14 Dec 2006 14:10:57 -0000 1.8 *************** *** 545,576 **** end #class TestAlignmentEnumerableExtension ! class TestAlignmentClustalWFormatter < Test::Unit::TestCase def setup @obj = Object.new ! @obj.extend(Alignment::ClustalWFormatter) end ! def test_have_same_name_true assert_equal([ 0, 1 ], @obj.instance_eval { ! have_same_name?([ 'ATP ATG', 'ATP ATA', 'BBB' ]) }) end def test_have_same_name_false assert_equal(false, @obj.instance_eval { ! have_same_name?([ 'GTP ATG', 'ATP ATA', 'BBB' ]) }) end def test_avoid_same_name assert_equal([ 'ATP_ATG', 'ATP_ATA', 'BBB' ], ! @obj.instance_eval { ! avoid_same_name([ 'ATP ATG', 'ATP ATA', 'BBB' ]) }) end def test_avoid_same_name_numbering assert_equal([ '0_ATP', '1_ATP', '2_BBB' ], ! @obj.instance_eval { ! avoid_same_name([ 'ATP', 'ATP', 'BBB' ]) }) end ! end #class TestAlignmentClustalWFormatter --- 545,577 ---- end #class TestAlignmentEnumerableExtension ! class TestAlignmentOutput < Test::Unit::TestCase def setup @obj = Object.new ! @obj.extend(Alignment::Output) end ! def test_clustal_have_same_name_true assert_equal([ 0, 1 ], @obj.instance_eval { ! __clustal_have_same_name?([ 'ATP ATG', 'ATP ATA', 'BBB' ]) }) end def test_have_same_name_false assert_equal(false, @obj.instance_eval { ! __clustal_have_same_name?([ 'GTP ATG', 'ATP ATA', 'BBB' ]) }) end def test_avoid_same_name assert_equal([ 'ATP_ATG', 'ATP_ATA', 'BBB' ], ! @obj.instance_eval { ! __clustal_avoid_same_name([ 'ATP ATG', 'ATP ATA', 'BBB' ]) }) end + def test_avoid_same_name_numbering assert_equal([ '0_ATP', '1_ATP', '2_BBB' ], ! @obj.instance_eval { ! __clustal_avoid_same_name([ 'ATP', 'ATP', 'BBB' ]) }) end ! end #class TestAlignmentOutput From ngoto at dev.open-bio.org Thu Dec 14 09:11:56 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 14:11:56 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio alignment.rb,1.18,1.19 Message-ID: <200612141411.kBEEBuxZ013380@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv13360/lib/bio Modified Files: alignment.rb Log Message: fixed mistaken amino distingushing routine in match_line. Index: alignment.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/alignment.rb,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** alignment.rb 14 Dec 2006 12:39:45 -0000 1.18 --- alignment.rb 14 Dec 2006 14:11:54 -0000 1.19 *************** *** 623,630 **** elsif seqclass == Bio::Sequence::NA then amino = false - elsif self.each_seq { |x| /[EFILPQ]/i =~ x } then - amino = true else amino = nil end end --- 623,634 ---- elsif seqclass == Bio::Sequence::NA then amino = false else amino = nil + self.each_seq do |x| + if /[EFILPQ]/i =~ x + amino = true + break + end + end end end From ngoto at dev.open-bio.org Thu Dec 14 09:54:53 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 14:54:53 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb, 1.11, 1.12 mafft.rb, 1.11, 1.12 sim4.rb, 1.6, 1.7 Message-ID: <200612141454.kBEEsqG1013493@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory dev.open-bio.org:/tmp/cvs-serv13473/lib/bio/appl Modified Files: clustalw.rb mafft.rb sim4.rb Log Message: Changed to use Bio::Command in bio/command.rb instead of Open3.popen3. Bio::(ClustalW|MAFFT|Sim4)#option is changed to #options. Bio::ClustalW::errorlog and Bio::(MAFFT|Sim4)#log are deprecated and there are no replacements for the methods. Index: sim4.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sim4.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** sim4.rb 30 Apr 2006 05:50:19 -0000 1.6 --- sim4.rb 14 Dec 2006 14:54:50 -0000 1.7 *************** *** 16,21 **** # - require 'open3' require 'tempfile' module Bio --- 16,21 ---- # require 'tempfile' + require 'bio/command' module Bio *************** *** 30,41 **** # [+database+] Default file name of database('seq2'). # [+option+] Options (array of strings). ! def initialize(program = 'sim4', database = nil, option = []) @program = program ! @option = option @database = database #seq2 @command = nil @output = nil @report = nil - @log = nil end --- 30,40 ---- # [+database+] Default file name of database('seq2'). # [+option+] Options (array of strings). ! def initialize(program = 'sim4', database = nil, opt = []) @program = program ! @options = opt @database = database #seq2 @command = nil @output = nil @report = nil end *************** *** 47,57 **** # options ! attr_reader :option # last command-line strings executed by the object attr_reader :command # last messages of program reported to the STDERR ! attr_reader :log # last result text (String) --- 46,70 ---- # options ! attr_accessor :options ! ! # option is deprecated. Instead, please use options. ! def option ! warn "option is deprecated. Please use options." ! options ! end # last command-line strings executed by the object attr_reader :command + #--- # last messages of program reported to the STDERR ! #attr_reader :log ! #+++ ! ! #log is deprecated (no replacement) and returns empty string. ! def log ! warn "log is deprecated (no replacement) and returns empty string." ! '' ! end # last result text (String) *************** *** 97,112 **** @command = [ @program, filename1, (filename2 or @database), *@option ] @output = nil - @log = nil @report = nil ! Open3.popen3(*@command) do |din, dout, derr| ! din.close ! derr.sync = true ! t = Thread.start { @log = derr.read } ! begin ! @output = dout.read ! @report = Bio::Sim4::Report.new(@output) ! ensure ! t.join ! end end @report --- 110,118 ---- @command = [ @program, filename1, (filename2 or @database), *@option ] @output = nil @report = nil ! Bio::Command.call_command(*@command) do |io| ! io.close_write ! @output = io.read ! @report = Bio::Sim4::Report.new(@output) end @report Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** clustalw.rb 30 Apr 2006 05:50:19 -0000 1.11 --- clustalw.rb 14 Dec 2006 14:54:50 -0000 1.12 *************** *** 24,29 **** require 'tempfile' - require 'open3' require 'bio/sequence' require 'bio/alignment' --- 24,29 ---- require 'tempfile' + require 'bio/command' require 'bio/sequence' require 'bio/alignment' *************** *** 39,45 **** # Creates a new CLUSTAL W execution wrapper object (alignment factory). ! def initialize(program = 'clustalw', option = []) @program = program ! @option = option @command = nil @output = nil --- 39,45 ---- # Creates a new CLUSTAL W execution wrapper object (alignment factory). ! def initialize(program = 'clustalw', opt = []) @program = program ! @options = opt @command = nil @output = nil *************** *** 52,56 **** # options ! attr_accessor :option # Returns last command-line strings executed by this factory. --- 52,62 ---- # options ! attr_accessor :options ! ! # option is deprecated. Instead, please use options. ! def option ! warn "option is deprecated. Please use options." ! options ! end # Returns last command-line strings executed by this factory. *************** *** 144,149 **** attr_reader :output_dnd # Returns last error messages (to stderr) of CLUSTAL W execution. ! attr_reader :errorlog private --- 150,162 ---- attr_reader :output_dnd + #--- # Returns last error messages (to stderr) of CLUSTAL W execution. ! #attr_reader :errorlog ! #+++ ! #errorlog is deprecated (no replacement) and returns empty string. ! def errorlog ! warn "errorlog is deprecated (no replacement) and returns empty string." ! '' ! end private *************** *** 154,170 **** @log = nil ! Open3.popen3(*@command) do |din, dout, derr| ! din.close ! t = Thread.start do ! @errorlog = derr.read ! end @log = dout.read t.join end - # @command_string = @command.join(" ") - # IO.popen(@command, "r") do |io| - # io.sync = true - # @log = io.read - # end @log end --- 167,175 ---- @log = nil ! Bio::Command.call_command(*@command) do |io| ! io.close_write @log = dout.read t.join end @log end Index: mafft.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft.rb,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** mafft.rb 25 Sep 2006 08:09:22 -0000 1.11 --- mafft.rb 14 Dec 2006 14:54:50 -0000 1.12 *************** *** 24,36 **** # require 'bio/db/fasta' require 'bio/io/flatfile' - #-- - # We use Open3.popen3, because MAFFT on win32 requires Cygwin. - #++ - require 'open3' - require 'tempfile' - module Bio --- 24,34 ---- # + require 'tempfile' + + require 'bio/command' + require 'bio/db/fasta' require 'bio/io/flatfile' module Bio *************** *** 108,118 **** # +program+ is the name of the program. # +opt+ is options of the program. ! def initialize(program, option) @program = program ! @option = option @command = nil @output = nil @report = nil - @log = nil end --- 106,115 ---- # +program+ is the name of the program. # +opt+ is options of the program. ! def initialize(program, opt) @program = program ! @options = opt @command = nil @output = nil @report = nil end *************** *** 121,125 **** # options ! attr_accessor :option # Shows last command-line string. Returns nil or an array of String. --- 118,128 ---- # options ! attr_accessor :options ! ! # option is deprecated. Instead, please use options. ! def option ! warn "option is deprecated. Please use options." ! options ! end # Shows last command-line string. Returns nil or an array of String. *************** *** 128,133 **** attr_reader :command # last message to STDERR when executing the program. ! attr_reader :log # Shows latest raw alignment result. --- 131,144 ---- attr_reader :command + #--- # last message to STDERR when executing the program. ! #attr_reader :log ! #+++ ! ! #log is deprecated (no replacement) and returns empty string. ! def log ! warn "log is deprecated (no replacement) and returns empty string." ! '' ! end # Shows latest raw alignment result. *************** *** 189,204 **** #STDERR.print "DEBUG: ", @command.join(" "), "\n" @output = nil ! @log = nil ! Open3.popen3(*@command) do |din, dout, derr| ! din.close ! derr.sync = true ! t = Thread.start do ! @log = derr.read ! end ! ff = Bio::FlatFile.new(Bio::FastaFormat, dout) @output = ff.to_a - t.join end - @log end --- 200,208 ---- #STDERR.print "DEBUG: ", @command.join(" "), "\n" @output = nil ! Bio::Command.call_command(*@command) do |io| ! io.close_write ! ff = Bio::FlatFile.new(Bio::FastaFormat, io) @output = ff.to_a end end From ngoto at dev.open-bio.org Thu Dec 14 10:09:01 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 15:09:01 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio alignment.rb,1.19,1.20 Message-ID: <200612141509.kBEF91un013590@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv13570/lib/bio Modified Files: alignment.rb Log Message: EnumerableExtension#number_of_sequences are added and some methods are modifed to use it. Index: alignment.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/alignment.rb,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** alignment.rb 14 Dec 2006 14:11:54 -0000 1.19 --- alignment.rb 14 Dec 2006 15:08:59 -0000 1.20 *************** *** 989,993 **** mline = (options[:match_line] or seqs.match_line(mopt)) ! aseqs = Array.new(seqs.size).clear seqs.each_seq do |s| aseqs << s.to_s.gsub(seqs.gap_regexp, gchar) --- 989,993 ---- mline = (options[:match_line] or seqs.match_line(mopt)) ! aseqs = Array.new(seqs.number_of_sequences).clear seqs.each_seq do |s| aseqs << s.to_s.gsub(seqs.gap_regexp, gchar) *************** *** 1087,1091 **** def __output_phylip_common(options = {}) len = self.alignment_length ! aln = [ " #{self.size} #{len}\n" ] sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] --- 1087,1091 ---- def __output_phylip_common(options = {}) len = self.alignment_length ! aln = [ " #{self.number_of_sequences} #{len}\n" ] sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] *************** *** 1108,1112 **** gchar = (options[:gap_char] or '-') ! aseqs = Array.new(len).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) --- 1108,1112 ---- gchar = (options[:gap_char] or '-') ! aseqs = Array.new(self.number_of_sequences).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) *************** *** 1139,1143 **** def output_molphy(options = {}) len = self.alignment_length ! header = "#{self.size} #{len}\n" sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] --- 1139,1143 ---- def output_molphy(options = {}) len = self.alignment_length ! header = "#{self.number_of_sequences} #{len}\n" sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] *************** *** 1182,1192 **** include Output # Returns an array of sequence names. # The order of the names must be the same as # the order of each_seq. def sequence_names ! i = 0 ! self.each_seq { |s| i += 1 } ! (0...i).to_a end end #module EnumerableExtension --- 1182,1197 ---- include Output + # Returns number of sequences in this alignment. + def number_of_sequences + i = 0 + self.each_seq { |s| i += 1 } + i + end + # Returns an array of sequence names. # The order of the names must be the same as # the order of each_seq. def sequence_names ! (0...(self.number_of_sequences)).to_a end end #module EnumerableExtension *************** *** 1210,1213 **** --- 1215,1223 ---- each(&block) end + + # Returns number of sequences in this alignment. + def number_of_sequences + self.size + end end #module ArrayExtension *************** *** 1300,1303 **** --- 1310,1318 ---- end + # Returns number of sequences in this alignment. + def number_of_sequences + self.size + end + # Returns an array of sequence names. # The order of the names must be the same as *************** *** 1579,1582 **** --- 1594,1598 ---- @seqs.size end + alias number_of_sequences size # If the key exists, returns true. Otherwise, returns false. From ngoto at dev.open-bio.org Thu Dec 14 10:22:07 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 15:22:07 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/clustalw report.rb,1.10,1.11 Message-ID: <200612141522.kBEFM7ni013826@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/clustalw In directory dev.open-bio.org:/tmp/cvs-serv13804/lib/bio/appl/clustalw Modified Files: report.rb Log Message: Bio::(ClustalW|MAFFT)::Report#algin is deprecated. Instead, please use #alignment method. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw/report.rb,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** report.rb 30 Apr 2006 05:50:19 -0000 1.10 --- report.rb 14 Dec 2006 15:22:05 -0000 1.11 *************** *** 82,95 **** # Gets an multiple alignment. # Returns a Bio::Alignment object. ! def align do_parse() unless @align @align end ! alias alignment align # Gets an fasta-format string of the sequences. # Returns a string. def to_fasta(*arg) ! align.to_fasta(*arg) end --- 82,103 ---- # Gets an multiple alignment. # Returns a Bio::Alignment object. ! def alignment do_parse() unless @align @align end ! ! # This will be deprecated. Instead, please use alignment. ! # ! # Gets an multiple alignment. ! # Returns a Bio::Alignment object. ! def align ! warn "align method will be deprecated. Please use \'alignment\'." ! alignment ! end # Gets an fasta-format string of the sequences. # Returns a string. def to_fasta(*arg) ! alignment.to_fasta(*arg) end *************** *** 97,101 **** # Returns an array of Bio::FastaFormat objects. def to_a ! align.to_fastaformat_array end --- 105,109 ---- # Returns an array of Bio::FastaFormat objects. def to_a ! alignment.to_fastaformat_array end From ngoto at dev.open-bio.org Thu Dec 14 10:22:07 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 15:22:07 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/mafft report.rb,1.9,1.10 Message-ID: <200612141522.kBEFM7vN013829@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/mafft In directory dev.open-bio.org:/tmp/cvs-serv13804/lib/bio/appl/mafft Modified Files: report.rb Log Message: Bio::(ClustalW|MAFFT)::Report#algin is deprecated. Instead, please use #alignment method. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft/report.rb,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** report.rb 30 Apr 2006 05:50:19 -0000 1.9 --- report.rb 14 Dec 2006 15:22:05 -0000 1.10 *************** *** 67,76 **** # Gets an multiple alignment. ! # Returns an instance of Bio::Alignment class. ! def align do_parse() unless @align @align end ! alias alignment align # Gets an fasta-format string of the sequences. --- 67,84 ---- # Gets an multiple alignment. ! # Returns a Bio::Alignment object. ! def alignment do_parse() unless @align @align end ! ! # This will be deprecated. Instead, please use alignment. ! # ! # Gets an multiple alignment. ! # Returns a Bio::Alignment object. ! def align ! warn "align method will be deprecated. Please use \'alignment\'." ! alignment ! end # Gets an fasta-format string of the sequences. *************** *** 79,83 **** # Please refer to Bio::Alignment#to_fasta for arguments. def to_fasta(*arg) ! align.to_fasta(*arg) end --- 87,91 ---- # Please refer to Bio::Alignment#to_fasta for arguments. def to_fasta(*arg) ! alignment.to_fasta(*arg) end From ngoto at dev.open-bio.org Thu Dec 14 10:56:25 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 15:56:25 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb, 1.12, 1.13 mafft.rb, 1.12, 1.13 sim4.rb, 1.7, 1.8 Message-ID: <200612141556.kBEFuPd7014039@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory dev.open-bio.org:/tmp/cvs-serv14019/lib/bio/appl Modified Files: clustalw.rb mafft.rb sim4.rb Log Message: forggoten to change @option into @options Index: sim4.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sim4.rb,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** sim4.rb 14 Dec 2006 14:54:50 -0000 1.7 --- sim4.rb 14 Dec 2006 15:56:22 -0000 1.8 *************** *** 108,112 **** # If filename2 is not specified, using self.database. def exec_local(filename1, filename2 = nil) ! @command = [ @program, filename1, (filename2 or @database), *@option ] @output = nil @report = nil --- 108,112 ---- # If filename2 is not specified, using self.database. def exec_local(filename1, filename2 = nil) ! @command = [ @program, filename1, (filename2 or @database), *@options ] @output = nil @report = nil Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** clustalw.rb 14 Dec 2006 14:54:50 -0000 1.12 --- clustalw.rb 14 Dec 2006 15:56:22 -0000 1.13 *************** *** 83,87 **** query_align(seqs) else ! exec_local(@option) end end --- 83,87 ---- query_align(seqs) else ! exec_local(@options) end end *************** *** 135,139 **** ] opt << "-type=#{seqtype}" if seqtype ! opt.concat(@option) exec_local(opt) tf_out.open --- 135,139 ---- ] opt << "-type=#{seqtype}" if seqtype ! opt.concat(@options) exec_local(opt) tf_out.open Index: mafft.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft.rb,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** mafft.rb 14 Dec 2006 14:54:50 -0000 1.12 --- mafft.rb 14 Dec 2006 15:56:22 -0000 1.13 *************** *** 159,163 **** query_align(seqs) else ! exec_local(@option) end end --- 159,163 ---- query_align(seqs) else ! exec_local(@options) end end *************** *** 188,192 **** # Performs alignment of sequences in the file named +fn+. def query_by_filename(fn, seqtype = nil) ! opt = @option + [ fn ] exec_local(opt) @report = Report.new(@output, seqtype) --- 188,192 ---- # Performs alignment of sequences in the file named +fn+. def query_by_filename(fn, seqtype = nil) ! opt = @options + [ fn ] exec_local(opt) @report = Report.new(@output, seqtype) From ngoto at dev.open-bio.org Thu Dec 14 10:59:23 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 15:59:23 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb, 1.13, 1.14 mafft.rb, 1.13, 1.14 sim4.rb, 1.8, 1.9 Message-ID: <200612141559.kBEFxNqn014111@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory dev.open-bio.org:/tmp/cvs-serv14091/lib/bio/appl Modified Files: clustalw.rb mafft.rb sim4.rb Log Message: forgotten to change *@command to @command. Index: sim4.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sim4.rb,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** sim4.rb 14 Dec 2006 15:56:22 -0000 1.8 --- sim4.rb 14 Dec 2006 15:59:21 -0000 1.9 *************** *** 111,115 **** @output = nil @report = nil ! Bio::Command.call_command(*@command) do |io| io.close_write @output = io.read --- 111,115 ---- @output = nil @report = nil ! Bio::Command.call_command(@command) do |io| io.close_write @output = io.read Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** clustalw.rb 14 Dec 2006 15:56:22 -0000 1.13 --- clustalw.rb 14 Dec 2006 15:59:21 -0000 1.14 *************** *** 167,171 **** @log = nil ! Bio::Command.call_command(*@command) do |io| io.close_write @log = dout.read --- 167,171 ---- @log = nil ! Bio::Command.call_command(@command) do |io| io.close_write @log = dout.read Index: mafft.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft.rb,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** mafft.rb 14 Dec 2006 15:56:22 -0000 1.13 --- mafft.rb 14 Dec 2006 15:59:21 -0000 1.14 *************** *** 200,204 **** #STDERR.print "DEBUG: ", @command.join(" "), "\n" @output = nil ! Bio::Command.call_command(*@command) do |io| io.close_write ff = Bio::FlatFile.new(Bio::FastaFormat, io) --- 200,204 ---- #STDERR.print "DEBUG: ", @command.join(" "), "\n" @output = nil ! Bio::Command.call_command(@command) do |io| io.close_write ff = Bio::FlatFile.new(Bio::FastaFormat, io) From ngoto at dev.open-bio.org Thu Dec 14 11:04:04 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 16:04:04 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb,1.14,1.15 Message-ID: <200612141604.kBEG44Cx014165@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory dev.open-bio.org:/tmp/cvs-serv14145 Modified Files: clustalw.rb Log Message: forgotten mistakes (din was changed to io) Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** clustalw.rb 14 Dec 2006 15:59:21 -0000 1.14 --- clustalw.rb 14 Dec 2006 16:04:02 -0000 1.15 *************** *** 169,173 **** Bio::Command.call_command(@command) do |io| io.close_write ! @log = dout.read t.join end --- 169,173 ---- Bio::Command.call_command(@command) do |io| io.close_write ! @log = io.read t.join end From ngoto at dev.open-bio.org Thu Dec 14 11:06:01 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 16:06:01 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb,1.15,1.16 Message-ID: <200612141606.kBEG61eJ014214@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory dev.open-bio.org:/tmp/cvs-serv14194 Modified Files: clustalw.rb Log Message: changed to use output_fasta instead of to_fasta Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** clustalw.rb 14 Dec 2006 16:04:02 -0000 1.15 --- clustalw.rb 14 Dec 2006 16:05:59 -0000 1.16 *************** *** 102,106 **** break if seqtype end ! query_string(seqs.to_fasta(70, :avoid_same_name => true), seqtype) end --- 102,106 ---- break if seqtype end ! query_string(seqs.output_fasta(70, :avoid_same_name => true), seqtype) end From ngoto at dev.open-bio.org Thu Dec 14 11:08:48 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 16:08:48 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb, 1.16, 1.17 mafft.rb, 1.14, 1.15 Message-ID: <200612141608.kBEG8mYv014242@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory dev.open-bio.org:/tmp/cvs-serv14222 Modified Files: clustalw.rb mafft.rb Log Message: Changed to use output_fasta instead of to_fasta and options are changed. A mistake is fixed in clustalw.rb Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** clustalw.rb 14 Dec 2006 16:05:59 -0000 1.16 --- clustalw.rb 14 Dec 2006 16:08:46 -0000 1.17 *************** *** 102,106 **** break if seqtype end ! query_string(seqs.output_fasta(70, :avoid_same_name => true), seqtype) end --- 102,107 ---- break if seqtype end ! query_string(seqs.output_fasta(:width => 70, ! :avoid_same_name => true), seqtype) end *************** *** 170,174 **** io.close_write @log = io.read - t.join end @log --- 171,174 ---- Index: mafft.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft.rb,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** mafft.rb 14 Dec 2006 15:59:21 -0000 1.14 --- mafft.rb 14 Dec 2006 16:08:46 -0000 1.15 *************** *** 169,173 **** seqs = Bio::Alignment.new(seqs, *arg) end ! query_string(seqs.to_fasta(70)) end --- 169,173 ---- seqs = Bio::Alignment.new(seqs, *arg) end ! query_string(seqs.output_fasta(:width => 70)) end From ngoto at dev.open-bio.org Thu Dec 14 11:13:30 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 16:13:30 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/phylip - New directory Message-ID: <200612141613.kBEGDUlK014339@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/phylip In directory dev.open-bio.org:/tmp/cvs-serv14317/phylip Log Message: Directory /home/repository/bioruby/bioruby/lib/bio/appl/phylip added to the repository From ngoto at dev.open-bio.org Thu Dec 14 11:13:30 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 16:13:30 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/gcg - New directory Message-ID: <200612141613.kBEGDUfW014343@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/gcg In directory dev.open-bio.org:/tmp/cvs-serv14317/gcg Log Message: Directory /home/repository/bioruby/bioruby/lib/bio/appl/gcg added to the repository From nakao at dev.open-bio.org Thu Dec 14 11:19:25 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:19:25 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/iprscan - New directory Message-ID: <200612141619.kBEGJPNB014391@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/iprscan In directory dev.open-bio.org:/tmp/cvs-serv14371/lib/bio/appl/iprscan Log Message: Directory /home/repository/bioruby/bioruby/lib/bio/appl/iprscan added to the repository From nakao at dev.open-bio.org Thu Dec 14 11:20:03 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:20:03 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/iprscan - New directory Message-ID: <200612141620.kBEGK3Qd014439@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/iprscan In directory dev.open-bio.org:/tmp/cvs-serv14419/test/unit/bio/appl/iprscan Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/appl/iprscan added to the repository From nakao at dev.open-bio.org Thu Dec 14 11:20:27 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:20:27 +0000 Subject: [BioRuby-cvs] bioruby/test/data/iprscan - New directory Message-ID: <200612141620.kBEGKRYP014523@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/iprscan In directory dev.open-bio.org:/tmp/cvs-serv14502/test/data/iprscan Log Message: Directory /home/repository/bioruby/bioruby/test/data/iprscan added to the repository From nakao at dev.open-bio.org Thu Dec 14 11:22:14 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:22:14 +0000 Subject: [BioRuby-cvs] bioruby/test/data/iprscan merged.raw,NONE,1.1 Message-ID: <200612141622.kBEGMEvx014606@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/iprscan In directory dev.open-bio.org:/tmp/cvs-serv14577/test/data/iprscan Added Files: merged.raw Log Message: * Newly added files for InterProScan. --- NEW FILE: merged.raw --- Q9RHD9 D44DAE8C544CB7C1 267 HMMPfam PF00575 S1 1 55 3.3E-6 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMPfam PF00575 S1 68 142 4.1E-19 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMPfam PF00575 S1 155 228 1.8E-19 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMSmart SM00316 S1 3 55 7.1E-7 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMSmart SM00316 S1 70 142 8.1E-20 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 HMMSmart SM00316 S1 157 228 1.5E-21 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 ProfileScan PS50126 S1 1 55 14.869 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 ProfileScan PS50126 S1 72 142 20.809 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 ProfileScan PS50126 S1 159 228 22.541 T 11-Nov-2005 IPR003029 RNA binding S1 Molecular Function:RNA binding (GO:0003723) Q9RHD9 D44DAE8C544CB7C1 267 FPrintScan PR00681 RIBOSOMALS1 6 27 1.5E-17 T 11-Nov-2005 IPR000110 Ribosomal protein S1 Molecular Function:RNA binding (GO:0003723), Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Q9RHD9 D44DAE8C544CB7C1 267 FPrintScan PR00681 RIBOSOMALS1 85 104 1.5E-17 T 11-Nov-2005 IPR000110 Ribosomal protein S1 Molecular Function:RNA binding (GO:0003723), Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Q9RHD9 D44DAE8C544CB7C1 267 FPrintScan PR00681 RIBOSOMALS1 125 143 1.5E-17 T 11-Nov-2005 IPR000110 Ribosomal protein S1 Molecular Function:RNA binding (GO:0003723), Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Q9RHD9 D44DAE8C544CB7C1 267 superfamily SSF50249 Nucleic_acid_OB 3 60 1.4E-7 T 11-Nov-2005 IPR008994 Nucleic acid-binding OB-fold Molecular Function:nucleic acid binding (GO:0003676) Q9RHD9 D44DAE8C544CB7C1 267 superfamily SSF50249 Nucleic_acid_OB 61 205 6.3999999999999995E-24 T 11-Nov-2005 IPR008994 Nucleic acid-binding OB-fold Molecular Function:nucleic acid binding (GO:0003676) RS16_ECOLI F94D07049A6D489D 82 HMMTigr TIGR00002 S16 2 81 117.16 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 superfamily SSF54565 Ribosomal_S16 1 79 1.81E-8 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 HMMPfam PF00886 Ribosomal_S16 8 68 2.7000000000000004E-33 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 BlastProDom PD003791 Ribosomal_S16 10 77 4.0E-33 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) RS16_ECOLI F94D07049A6D489D 82 ProfileScan PS00732 RIBOSOMAL_S16 2 11 8.0E-5 T 11-Nov-2005 IPR000307 Ribosomal protein S16 Molecular Function:structural constituent of ribosome (GO:0003735), Cellular Component:intracellular (GO:0005622), Cellular Component:ribosome (GO:0005840), Biological Process:protein biosynthesis (GO:0006412) Y902_MYCTU CD84A335CCFFE6D7 446 superfamily SSF47384 His_kin_homodim 220 292 5.89E-7 T 11-Nov-2005 IPR009082 Histidine kinase, homodimeric Y902_MYCTU CD84A335CCFFE6D7 446 HMMSmart SM00304 HAMP 170 222 1.8E-6 T 11-Nov-2005 IPR003660 Histidine kinase, HAMP region Molecular Function:signal transducer activity (GO:0004871), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 ProfileScan PS50885 HAMP 170 222 7.777 T 11-Nov-2005 IPR003660 Histidine kinase, HAMP region Molecular Function:signal transducer activity (GO:0004871), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 HMMPfam PF00672 HAMP 151 219 1.1E-8 T 11-Nov-2005 IPR003660 Histidine kinase, HAMP region Molecular Function:signal transducer activity (GO:0004871), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 ProfileScan PS50109 HIS_KIN 237 446 34.449 T 11-Nov-2005 IPR005467 Histidine kinase Biological Process:protein amino acid phosphorylation (GO:0006468), Molecular Function:kinase activity (GO:0016301) Y902_MYCTU CD84A335CCFFE6D7 446 HMMSmart SM00388 HisKA 230 296 1.4E-12 T 11-Nov-2005 IPR003661 Histidine kinase A, N-terminal Molecular Function:two-component sensor molecule activity (GO:0000155), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 HMMPfam PF00512 HisKA 230 296 2.4E-11 T 11-Nov-2005 IPR003661 Histidine kinase A, N-terminal Molecular Function:two-component sensor molecule activity (GO:0000155), Biological Process:signal transduction (GO:0007165), Cellular Component:membrane (GO:0016020) Y902_MYCTU CD84A335CCFFE6D7 446 HMMSmart SM00387 HATPase_c 338 446 2.9E-24 T 11-Nov-2005 IPR003594 ATP-binding region, ATPase-like Molecular Function:ATP binding (GO:0005524) Y902_MYCTU CD84A335CCFFE6D7 446 HMMPfam PF02518 HATPase_c 338 445 2.5E-26 T 11-Nov-2005 IPR003594 ATP-binding region, ATPase-like Molecular Function:ATP binding (GO:0005524) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 374 388 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 392 402 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 406 424 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) Y902_MYCTU CD84A335CCFFE6D7 446 FPrintScan PR00344 BCTRLSENSOR 430 443 2.0E-12 T 11-Nov-2005 IPR004358 Histidine kinase related protein, C-terminal Biological Process:phosphorylation (GO:0016310), Molecular Function:transferase activity, transferring phosphorus-containing groups (GO:0016772) From nakao at dev.open-bio.org Thu Dec 14 11:22:14 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:22:14 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/iprscan test_report.rb, NONE, 1.1 Message-ID: <200612141622.kBEGMEgZ014610@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/iprscan In directory dev.open-bio.org:/tmp/cvs-serv14577/test/unit/bio/appl/iprscan Added Files: test_report.rb Log Message: * Newly added files for InterProScan. --- NEW FILE: test_report.rb --- # # test/unit/bio/appl/iprscan/test_report.rb - Unit test for Bio::InterProScan::Report # # Copyright (C) 2006 Mitsuteru Nakao # # $Id: test_report.rb,v 1.1 2006/12/14 16:22:12 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/appl/iprscan/report' module Bio class TestIprscanData bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), [".."] * 5)).cleanpath.to_s TestDataIprscan = Pathname.new(File.join(bioruby_root, "test", "data", "iprscan")).cleanpath.to_s def self.raw_format File.open(File.join(TestDataIprscan, "merged.raw")) end end class TestIprscanTxtReport < Test::Unit::TestCase def setup test_entry=<<-END slr0002\t860 InterPro\tIPR001264\tGlycosyl transferase, family 51 BlastProDom\tPD001895\tsp_Q55683_SYNY3_Q55683\t2e-37\t292-370 HMMPfam\tPF00912\tTransglycosyl\t8e-104\t204-372 InterPro\tIPR001460\tPenicillin-binding protein, transpeptidase domain HMMPfam\tPF00905\tTranspeptidase\t5.7e-30\t451-742 InterPro\tNULL\tNULL ProfileScan\tPS50310\tALA_RICH\t10.224\t805-856 // END @obj = Bio::Iprscan::Report.parse_in_txt(test_entry) end def test_query_id assert_equal('slr0002', @obj.query_id) end def test_query_length assert_equal(860, @obj.query_length) end def test_matches_size assert_equal(4, @obj.matches.size) end def test_match_ipr_id assert_equal('IPR001264', @obj.matches.first.ipr_id) end def test_match_ipr_description assert_equal('Glycosyl transferase, family 51', @obj.matches.first.ipr_description) end def test_match_method assert_equal('BlastProDom', @obj.matches.first.method) end def test_match_accession assert_equal('PD001895', @obj.matches.first.accession) end def test_match_description assert_equal('sp_Q55683_SYNY3_Q55683', @obj.matches.first.description) end def test_match_evalue assert_equal('2e-37', @obj.matches.first.evalue) end def test_match_match_start assert_equal(292, @obj.matches.first.match_start) end def test_match_match_end assert_equal(370, @obj.matches.first.match_end) end end # TestIprscanTxtReport class TestIprscanRawReport < Test::Unit::TestCase def setup test_raw = Bio::TestIprscanData.raw_format entry = '' @obj = [] while line = test_raw.gets if entry != '' and entry.split("\t").first == line.split("\t").first entry << line elsif entry != '' @obj << Bio::Iprscan::Report.parse_in_raw(entry) entry = line else entry << line end end end def test_obj assert_equal(2, @obj.size) end def test_query_id assert_equal('Q9RHD9', @obj.first.query_id) end def test_entry_id assert_equal('Q9RHD9', @obj.first.entry_id) end def test_query_length assert_equal(267, @obj.first.query_length) end def test_match_query_id assert_equal('Q9RHD9', @obj.first.matches.first.query_id) end def test_match_crc64 assert_equal('D44DAE8C544CB7C1', @obj.first.matches.first.crc64) end def test_match_query_length assert_equal(267, @obj.first.matches.first.query_length) end def test_match_method assert_equal('HMMPfam', @obj.first.matches.first.method) end def test_match_accession assert_equal('PF00575', @obj.first.matches.first.accession) end def test_match_description assert_equal('S1', @obj.first.matches.first.description) end def test_match_match_start assert_equal(1, @obj.first.matches.first.match_start) end def test_match_match_end assert_equal(55, @obj.first.matches.first.match_end) end def test_match_evalue assert_equal('3.3E-6', @obj.first.matches.first.evalue) end def test_match_status assert_equal('T', @obj.first.matches.first.status) end def test_match_date assert_equal('11-Nov-2005', @obj.first.matches.first.date) end def test_match_ipr_id assert_equal('IPR003029', @obj.first.matches.first.ipr_id) end def test_match_ipr_description assert_equal('RNA binding S1', @obj.first.matches.first.ipr_description) end def test_match_go_terms assert_equal(["Molecular Function:RNA binding (GO:0003723)"], @obj.first.matches.first.go_terms) end end end From nakao at dev.open-bio.org Thu Dec 14 11:22:14 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:22:14 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/iprscan report.rb,NONE,1.1 Message-ID: <200612141622.kBEGMEmb014615@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/iprscan In directory dev.open-bio.org:/tmp/cvs-serv14577/lib/bio/appl/iprscan Added Files: report.rb Log Message: * Newly added files for InterProScan. --- NEW FILE: report.rb --- # # = bio/appl/iprscan/report.rb - a class for iprscan output. # # Copyright:: Copyright (C) 2006 # Mitsuteru C. Nakao # License:: Ruby's # # $Id: report.rb,v 1.1 2006/12/14 16:22:12 nakao Exp $ # # == Report classes for the iprscan program. # module Bio class Iprscan # = DESCRIPTION # Class for InterProScan report. It is used to parse results and reformat # results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format. # # See ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html # # == USAGE # # Read a marged.txt and split each entry. # Bio::Iprscan::Report.reports_in_txt(File.read("marged.txt") do |report| # report.query_id # report.matches.size # report.matches.each do |match| # match.ipr_id #=> 'IPR...' # match.ipr_description # match.method # match.accession # match.description # match.match_start # match.match_end # match.evalue # end # # report.to_gff3 # # report.to_html # end # # Bio::Iprscan::Report.reports_in_raw(File.read("marged.raw") do |report| # report.class #=> Bio::Iprscan::Report # end # class Report # Entry delimiter pattern. RS = DELIMITER = "\n\/\/\n" # Qeury sequence name (entry_id). attr_accessor :query_id alias :entry_id :query_id # Qeury sequence length. attr_accessor :query_length # Matched InterPro motifs in Hash. Each InterPro motif have :name, # :definition, :accession and :motifs keys. And :motifs key contains # motifs in Array. Each motif have :method, :accession, :definition, # :score, :location_from and :location_to keys. attr_accessor :matches # == USAGE # Bio::Iprscan::Report.reports_in_raw(File.open("merged.raw")) do |report| # report # end # def self.reports_in_raw(io) entry = '' while line = io.gets if entry != '' and entry.split("\t").first == line.split("\t").first entry << line elsif entry != '' yield Bio::Iprscan::Report.parse_in_raw(entry) entry = line else entry << line end end end # Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report # object. def self.parse_in_raw(str) report = self.new str.split(/\n/).each do |line| line = line.split("\t") report.matches << Match.new(:query_id => line[0], :crc64 => line[1], :query_length => line[2].to_i, :method => line[3], :accession => line[4], :description => line[5], :match_start => line[6].to_i, :match_end => line[7].to_i, :evalue => line[8], :status => line[9], :date => line[10]) if line[11] report.matches.last.ipr_id = line[11] report.matches.last.ipr_description = line[12] end report.matches.last.go_terms = line[13].split(', ') if line[13] end report.query_id = report.matches.first.query_id report.query_length = report.matches.first.query_length report end # Parser method for a xml formated entry. Retruns a Bio::Iprscan::Report # object. def self.parse_in_xml(str) NotImplementedError end # Splits entry stream. # # == Usage # Bio::Iprscan::Report.reports_in_txt(File.open("merged.txt")) do |report| # report # end def self.reports_in_txt(io) io.each(/\n\/\/\n/m) do |entry| yield self.parse_in_txt(entry) end end # Parser method for a txt formated entry. Retruns a Bio::Iprscan::Report # object. # # == Usage # # File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| # report = Bio::Iprscan::Report.parse_in_txt(e) # end # def self.parse_in_txt(str) report = self.new ipr_line = '' str.split(/\n/).each do |line| line = line.split("\t") if line.size == 2 report.query_id = line[0] report.query_length = line[1].to_i elsif line.first == '//' elsif line.first == 'InterPro' ipr_line = line else startp, endp = line[4].split("-") report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => line[0], :accession => line[1], :description => line[2], :evalue => line[3], :match_start => startp.to_i, :match_end => endp.to_i) end end report end # def initialize @query_id = nil @query_length = nil @matches = [] end def to_html NotImplementedError end def to_xml NotImplementedError end def to_ebihtml NotImplementedError end def to_txt NotImplementedError end def to_raw NotImplementedError end def to_gff3 NotImplementedError end # == DESCRIPTION # Container class for InterProScan matches. # # == USAGE # match = Match.new(:query_id => ...) # # match.ipr_id = 'IPR001234' # match.ipr_id #=> 'IPR1234' # class Match def initialize(hash) @data = Hash.new hash.each do |key, value| @data[key.to_sym] = value end end # Date for computation. def date; @data[:date]; end # CRC64 checksum of query sequence. def crc64; @data[:crc64]; end # E-value of the match def evalue; @data[:evalue]; end # Status of the match (T for true / M for marginal). def status; @data[:status]; end # the corresponding InterPro entry (if any). def ipr_id; @data[:ipr_id]; end # the length of the sequence in AA. def length; @data[:length]; end # the analysis method launched. def method; @data[:method]; end # Object#metod overrided by Match#method # the Gene Ontology description for the InterPro entry, in "Aspect:term (ID)" format. def go_terms; @data[:go_terms]; end # Id of the input sequence. def query_id; @data[:query_id]; end # the end of the domain match. def match_end; @data[:match_end]; end # the database members entry for this match. def accession; @data[:accession]; end # the database mambers description for this match. def description; @data[:description]; end # the start of the domain match. def match_start; @data[:match_start]; end # the descriotion of the InterPro entry. def ipr_description; @data[:ipr_description]; end def method_missing(name, arg = nil) if arg name = name.to_s.sub(/=$/, '') @data[name.to_sym] = arg else @data[name.to_sym] end end end # class Match end # class Report end # class Iprscan end # module Bio From nakao at dev.open-bio.org Thu Dec 14 11:42:38 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 14 Dec 2006 16:42:38 +0000 Subject: [BioRuby-cvs] bioruby ChangeLog,1.54,1.55 Message-ID: <200612141642.kBEGgckx014692@dev.open-bio.org> Update of /home/repository/bioruby/bioruby In directory dev.open-bio.org:/tmp/cvs-serv14672 Modified Files: ChangeLog Log Message: * lib/bio/appl/iprscan/report.rb Newly added. Index: ChangeLog =================================================================== RCS file: /home/repository/bioruby/bioruby/ChangeLog,v retrieving revision 1.54 retrieving revision 1.55 diff -C2 -d -r1.54 -r1.55 *** ChangeLog 6 Oct 2006 09:53:38 -0000 1.54 --- ChangeLog 14 Dec 2006 16:42:36 -0000 1.55 *************** *** 1,2 **** --- 1,8 ---- + 2006-12-15 Mitsuteru Nakao + + * lib/bio/appl/iprscan/report.rb + + Bio::Iprscan::Report for InterProScan output is newly added. + 2006-10-05 Naohisa Goto From ngoto at dev.open-bio.org Thu Dec 14 14:52:55 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 19:52:55 +0000 Subject: [BioRuby-cvs] bioruby/lib bio.rb,1.72,1.73 Message-ID: <200612141952.kBEJqtim015845@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib In directory dev.open-bio.org:/tmp/cvs-serv15819/lib Modified Files: bio.rb Log Message: New files/classes: Bio::GCG::Msf in lib/bio/appl/gcg/msf.rb for GCG msf multiple sequence alignment format parser, and Bio::GCG::Seq in lib/bio/appl/gcg/seq.rb for GCG sequence format parser. Autoload of the classes (in bio.rb) and file format autodetection (in flatfile.rb) are also supported. Bio::Alignment::Output#output_msf, #output(:msf, ...) are added to generate msf formatted string from multiple alignment object. Index: bio.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio.rb,v retrieving revision 1.72 retrieving revision 1.73 diff -C2 -d -r1.72 -r1.73 *** bio.rb 13 Dec 2006 16:29:36 -0000 1.72 --- bio.rb 14 Dec 2006 19:52:53 -0000 1.73 *************** *** 231,234 **** --- 231,238 ---- autoload :Blat, 'bio/appl/blat/report' + module GCG + autoload :Msf, 'bio/appl/gcg/msf' + autoload :Seq, 'bio/appl/gcg/seq' + end ### Utilities From ngoto at dev.open-bio.org Thu Dec 14 14:52:55 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 19:52:55 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio alignment.rb,1.20,1.21 Message-ID: <200612141952.kBEJqtEn015850@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv15819/lib/bio Modified Files: alignment.rb Log Message: New files/classes: Bio::GCG::Msf in lib/bio/appl/gcg/msf.rb for GCG msf multiple sequence alignment format parser, and Bio::GCG::Seq in lib/bio/appl/gcg/seq.rb for GCG sequence format parser. Autoload of the classes (in bio.rb) and file format autodetection (in flatfile.rb) are also supported. Bio::Alignment::Output#output_msf, #output(:msf, ...) are added to generate msf formatted string from multiple alignment object. Index: alignment.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/alignment.rb,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** alignment.rb 14 Dec 2006 15:08:59 -0000 1.20 --- alignment.rb 14 Dec 2006 19:52:53 -0000 1.21 *************** *** 24,27 **** --- 24,32 ---- require 'bio/sequence' + #--- + # (depends on autoload) + #require 'bio/appl/gcg/seq' + #+++ + module Bio *************** *** 871,874 **** --- 876,881 ---- when :phylipnon output_phylipnon(*arg) + when :msf + output_msf(*arg) when :molphy output_molphy(*arg) *************** *** 1177,1180 **** --- 1184,1305 ---- aseqs.join('') end + + # Generates msf formatted text as a string + def output_msf(options = {}) + len = self.seq_length + + if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] + sn = __clustal_avoid_same_name(self.sequence_names) + else + sn = self.sequence_names.collect do |x| + x.to_s.gsub(/[\r\n\x00]/, ' ') + end + end + if !options.has_key?(:replace_space) or options[:replace_space] + sn.collect! { |x| x.gsub(/\s/, '_') } + end + if !options.has_key?(:escape) or options[:escape] + sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } + end + if !options.has_key?(:split) or options[:split] + sn.collect! { |x| x.split(/\s/)[0].to_s } + end + + seqwidth = 50 + namewidth = [31, sn.collect { |x| x.length }.max ].min + sep = ' ' * 2 + + seqregexp = Regexp.new("(.{1,#{seqwidth}})") + gchar = (options[:gap_char] or '.') + pchar = (options[:padding_char] or '~') + + aseqs = Array.new(self.number_of_sequences).clear + self.each_seq do |s| + aseqs << s.to_s.gsub(self.gap_regexp, gchar) + end + aseqs.each do |s| + s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length } + s.sub!(/#{Regexp.escape(gchar)}+\z/, '') + s << (pchar * (len - s.length)) + end + + case options[:case].to_s + when /lower/i + aseqs.each { |s| s.downcase! } + when /upper/i + aseqs.each { |s| s.upcase! } + else #default upcase + aseqs.each { |s| s.upcase! } + end + + case options[:type].to_s + when /protein/i, /aa/i + amino = true + when /na/i + amino = false + else + if seqclass == Bio::Sequence::AA then + amino = true + elsif seqclass == Bio::Sequence::NA then + amino = false + else + # if we can't determine, we asuume as protein. + amino = aseqs.size + aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x } + amino = false if amino <= 0 + end + end + + seq_type = (amino ? 'P' : 'N') + + fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf') + dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M') + + sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) } + #sums = aseqs.collect { |s| 0 } + sum = 0; sums.each { |x| sum += x }; sum %= 10000 + msf = + [ + "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n", + "\n", + "\n", + " #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n", + "\n" + ] + + sn.each do |snx| + msf << ' Name: ' + + sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] + + " Len: #{len} Check: #{sums.shift} Weight: 1.00\n" + end + msf << "\n//\n" + + aseqs.collect! do |s| + snx = sn.shift + head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep + s.gsub!(seqregexp, "\\1\n") + a = s.split(/^/) + a.collect { |x| head + x } + end + lines = (len + seqwidth - 1).div(seqwidth) + i = 1 + lines.times do + msf << "\n" + n_l = i + n_r = [ i + seqwidth - 1, len ].min + if n_l != n_r then + w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max + msf << (' ' * namewidth + sep + n_l.to_s + + ' ' * w + n_r.to_s + "\n") + else + msf << (' ' * namewidth + sep + n_l.to_s + "\n") + end + aseqs.each { |a| msf << a.shift } + i += seqwidth + end + msf << "\n" + msf.join('') + end + end #module Output From ngoto at dev.open-bio.org Thu Dec 14 14:52:55 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 14 Dec 2006 19:52:55 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/gcg msf.rb, NONE, 1.1 seq.rb, NONE, 1.1 Message-ID: <200612141952.kBEJqtQA015855@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/gcg In directory dev.open-bio.org:/tmp/cvs-serv15819/lib/bio/appl/gcg Added Files: msf.rb seq.rb Log Message: New files/classes: Bio::GCG::Msf in lib/bio/appl/gcg/msf.rb for GCG msf multiple sequence alignment format parser, and Bio::GCG::Seq in lib/bio/appl/gcg/seq.rb for GCG sequence format parser. Autoload of the classes (in bio.rb) and file format autodetection (in flatfile.rb) are also supported. Bio::Alignment::Output#output_msf, #output(:msf, ...) are added to generate msf formatted string from multiple alignment object. --- NEW FILE: msf.rb --- # # = bio/appl/gcg/msf.rb - GCG multiple sequence alignment (.msf) parser class # # Copyright:: Copyright (C) 2003, 2006 # Naohisa Goto # License:: Ruby's # # $Id: msf.rb,v 1.1 2006/12/14 19:52:53 ngoto Exp $ # # = About Bio::GCG::Msf # # Please refer document of Bio::GCG::Msf. # #--- # (depends on autoload) #require 'bio/appl/gcg/seq' #+++ module Bio module GCG # The msf is a multiple sequence alignment format developed by Wisconsin. # Bio::GCG::Msf is a msf format parser. class Msf #< DB # delimiter used by Bio::FlatFile DELIMITER = RS = nil # Creates a new Msf object. def initialize(str) str = str.sub(/\A[\r\n]+/, '') if /^\!\![A-Z]+\_MULTIPLE\_ALIGNMNENT/ =~ str[/.*/] then @heading = str[/.*/] # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this str.sub!(/.*/, '') end str.sub!(/.*\.\.$/m, '') @description = $&.to_s.sub(/^.*\.\.$/, '').to_s d = $&.to_s if m = /(.+)\s+MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then @entry_id = m[1].to_s.strip @length = (m[2] ? m[2].to_i : nil) @seq_type = m[3] @date = m[4].to_s.strip @checksum = (m[6] ? m[6].to_i : nil) end str.sub!(/.*\/\/$/m, '') a = $&.to_s.split(/^/) @seq_info = [] a.each do |x| if /Name\: / =~ x then s = {} x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } @seq_info << s end end @data = str @description.sub!(/\A(\r\n|\r|\n)/, '') @align = nil end # description attr_reader :description # ID of the alignment attr_reader :entry_id # alignment length attr_reader :length # sequence type ("N" for DNA/RNA or "P" for protein) attr_reader :seq_type # date attr_reader :date # checksum attr_reader :checksum # heading # ('!!NA_MULTIPLE_ALIGNMENT 1.0' or whatever like this) attr_reader :heading #--- ## data (internally used, will be obsoleted) #attr_reader :data # ## seq. info. (internally used, will be obsoleted) #attr_reader :seq_info #+++ # symbol comparison table def symbol_comparison_table unless defined?(@symbol_comparison_table) /Symbol comparison table\: +(\S+)/ =~ @description @symbol_comparison_table = $1 end @symbol_comparison_table end # gap weight def gap_weight unless defined?(@gap_weight) /GapWeight\: +(\S+)/ =~ @description @gap_weight = $1 end @gap_weight end # gap length weight def gap_length_weight unless defined?(@gap_length_weight) /GapLengthWeight\: +(\S+)/ =~ @description @gap_length_weight = $1 end @gap_length_weight end # CompCheck field def compcheck unless defined?(@compcheck) if /CompCheck\: +(\d+)/ =~ @description then @compcheck = $1.to_i else @compcheck = nil end end @compcheck end # parsing def do_parse return if @align a = @data.strip.split(/\n\n/) @seq_data = Array.new(@seq_info.size) @seq_data.collect! { |x| Array.new } a.each do |x| b = x.split(/\n/) nw = 0 if b.size > @seq_info.size then if /^ +/ =~ b.shift.to_s nw = $&.to_s.length end end if nw > 0 then b.each_with_ind