From d68f8c54f74474676571d74a8309f7337fdb51d5 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 20 Jun 2012 11:19:35 +0000 Subject: [PATCH] classify_taxonomy work git-svn-id: http://biopieces.googlecode.com/svn/trunk@1843 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/classify_taxonomy | 51 +++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/bp_bin/classify_taxonomy b/bp_bin/classify_taxonomy index 45da037..1614e5d 100755 --- a/bp_bin/classify_taxonomy +++ b/bp_bin/classify_taxonomy @@ -33,11 +33,11 @@ require 'maasha/biopieces' class TaxTree def initialize - @tree = TaxNode.new("root", "Root", 0, 0.0) + @tree = TaxNode.new("root", "Root", 0, 0, 0.0) end # Method to add to the TaxTree a GreenGenes entry. - def add_gg(s_id, score) + def add_gg(s_id, score, size) node = @tree s_id.scan(/ ([\w])__([^;]+)/) do @@ -45,7 +45,7 @@ class TaxTree name = $2 if node.children[name].nil? - node.children[name] = TaxNode.new(level, name, 1, score) + node.children[name] = TaxNode.new(level, name, 1, size, score) else node.children[name].count += 1 node.children[name].score += score @@ -69,6 +69,7 @@ class TaxTree else node.children[new_node.name].count += new_node.count node.children[new_node.name].score += new_node.score + node.children[new_node.name].size += new_node.size end node = node.children[new_node.name] @@ -76,7 +77,7 @@ class TaxTree end def flatten(node = @tree, list = []) - list << TaxNode.new(node.level, node.name, node.count, node.score) + list << TaxNode.new(node.level, node.name, node.count, node.size, node.score) node.children.each do |name, child| list = flatten(child, list.dup) @@ -101,26 +102,6 @@ class TaxTree self end - def dump(node = @tree) - indent = 0 - - case node.level - when "kingdom" then indent = 2 - when "phylum" then indent = 4 - when "class" then indent = 6 - when "order" then indent = 8 - when "family" then indent = 10 - when "genus" then indent = 12 - when "species" then indent = 14 - end - - puts (" " * indent) + "#{node.name} (#{node.level})" - - node.children.each do |name, child| - dump(child) - end - end - private def expand_level(level) @@ -141,12 +122,13 @@ class TaxTree end class TaxNode - attr_accessor :level, :name, :count, :score, :children + attr_accessor :level, :name, :count, :size, :score, :children - def initialize(level, name, count, score) + def initialize(level, name, count, size, score) @level = level @name = name @count = count + @size = size @score = score @children = {} end @@ -156,7 +138,7 @@ class TaxTree record[:REC_TYPE] = "Classification" record[:LEVEL] = @level record[:NAME] = @name - record[:COUNT] = @count + record[:COUNT] = @size record[:SCORE] = @count == 0 ? 0.0 : (@score / @count).round(2) record @@ -165,7 +147,8 @@ class TaxTree end casts = [] -casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'size', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) @@ -175,8 +158,18 @@ tax_hash = {} Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| if record[:Q_ID] and record[:S_ID] and record[:SCORE] + size = 1 + + if options[:size] + if record[:Q_ID].match(/_(\d+)$/) + size = $1.to_i + else + raise BiopiecesError, "Could not extract size from Q_ID: #{record[:Q_ID]}" + end + end + tax_hash[record[:Q_ID]] = TaxTree.new unless tax_hash[record[:Q_ID]] - tax_hash[record[:Q_ID]].add_gg(record[:S_ID], record[:SCORE].to_f) + tax_hash[record[:Q_ID]].add_gg(record[:S_ID], record[:SCORE].to_f, size) else output.puts record end -- 2.39.5