X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fclassify_taxonomy;h=64303fb6b582432ae052b2ceebb36684c3443f84;hb=48bea5c28b89dc5586d0bddb338ccd6ba23aa1f9;hp=7a9eae49f0c4b48a4de42534aa77be75b465e2c9;hpb=4eed3b661f80010e9dd2936951c747ae4d7382a9;p=biopieces.git diff --git a/bp_bin/classify_taxonomy b/bp_bin/classify_taxonomy index 7a9eae4..64303fb 100755 --- a/bp_bin/classify_taxonomy +++ b/bp_bin/classify_taxonomy @@ -49,7 +49,7 @@ class TaxNode def add_gg(s_id, score, size) node = self - s_id.scan(/ ([\w])__([^;]+)/) do + s_id.scan(/([\w])__([^;]+)/) do level = expand_level($1) name = $2 @@ -68,13 +68,13 @@ class TaxNode def merge(node_new, node_old = self) node_new.children.each do |name, child| if node_old.children[name] - node_old.count += node_new.count - node_old.score += node_new.score + node_old.children[name].count += child.count + node_old.children[name].score += child.score + + merge(child, node_old.children[name]) else node_old.children[name] = child end - - merge(child, node_old.children[name]) end end @@ -89,13 +89,29 @@ class TaxNode list end + # Method to recursively remove branches in taxonomic tree where the child count is less than or + # equal to a given minimum. + def debranch(min_count = nil) + node = self + + node.children.each do |name, child| + node.children.delete(name) if child.count <= min_count + end + + node.children.each_value do |child| + child.debranch(min_count) + end + end + # Method to recursively trim a taxonomic tree so that it only contains an unbranched tree, # which gives the lowest common ancestor. - def lowest_common_ancestor(node = self) + def lowest_common_ancestor + node = self + node.children = {} if node.children.size > 1 - node.children.each do |name, child| - lowest_common_ancestor(child) + node.children.each_value do |child| + child.lowest_common_ancestor end end @@ -142,8 +158,9 @@ class TaxNode end casts = [] -casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'size', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'size', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'min_count', :short=>'m', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) @@ -176,6 +193,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| if options[:LCA] tax_hash.each_value do |tree| + tree.debranch(options[:min_count]) if options[:min_count] tree.lowest_common_ancestor end