class TaxTree
def initialize
- @tree = TaxNode.new("root", "Root", 0, 0.0)
+ @tree = TaxNode.new("root", "Root", 0, 0, 0.0)
end
# Method to add to the TaxTree a GreenGenes entry.
- def add_gg(s_id, score)
+ def add_gg(s_id, score, size)
node = @tree
s_id.scan(/ ([\w])__([^;]+)/) do
name = $2
if node.children[name].nil?
- node.children[name] = TaxNode.new(level, name, 1, score)
+ node.children[name] = TaxNode.new(level, name, 1, size, score)
else
node.children[name].count += 1
node.children[name].score += score
else
node.children[new_node.name].count += new_node.count
node.children[new_node.name].score += new_node.score
+ node.children[new_node.name].size += new_node.size
end
node = node.children[new_node.name]
end
def flatten(node = @tree, list = [])
- list << TaxNode.new(node.level, node.name, node.count, node.score)
+ list << TaxNode.new(node.level, node.name, node.count, node.size, node.score)
node.children.each do |name, child|
list = flatten(child, list.dup)
self
end
- def dump(node = @tree)
- indent = 0
-
- case node.level
- when "kingdom" then indent = 2
- when "phylum" then indent = 4
- when "class" then indent = 6
- when "order" then indent = 8
- when "family" then indent = 10
- when "genus" then indent = 12
- when "species" then indent = 14
- end
-
- puts (" " * indent) + "#{node.name} (#{node.level})"
-
- node.children.each do |name, child|
- dump(child)
- end
- end
-
private
def expand_level(level)
end
class TaxNode
- attr_accessor :level, :name, :count, :score, :children
+ attr_accessor :level, :name, :count, :size, :score, :children
- def initialize(level, name, count, score)
+ def initialize(level, name, count, size, score)
@level = level
@name = name
@count = count
+ @size = size
@score = score
@children = {}
end
record[:REC_TYPE] = "Classification"
record[:LEVEL] = @level
record[:NAME] = @name
- record[:COUNT] = @count
+ record[:COUNT] = @size
record[:SCORE] = @count == 0 ? 0.0 : (@score / @count).round(2)
record
end
casts = []
-casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'size', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record[:Q_ID] and record[:S_ID] and record[:SCORE]
+ size = 1
+
+ if options[:size]
+ if record[:Q_ID].match(/_(\d+)$/)
+ size = $1.to_i
+ else
+ raise BiopiecesError, "Could not extract size from Q_ID: #{record[:Q_ID]}"
+ end
+ end
+
tax_hash[record[:Q_ID]] = TaxTree.new unless tax_hash[record[:Q_ID]]
- tax_hash[record[:Q_ID]].add_gg(record[:S_ID], record[:SCORE].to_f)
+ tax_hash[record[:Q_ID]].add_gg(record[:S_ID], record[:SCORE].to_f, size)
else
output.puts record
end