#!/usr/bin/env ruby # Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # This program is part of the Biopieces framework (www.biopieces.org). # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Classify records with taxonomy information. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'pp' require 'maasha/biopieces' class TaxTree def initialize @tree = TaxNode.new("root", "Root", 0, 0, 0.0) end # Method to add to the TaxTree a GreenGenes entry. def add_gg(s_id, score, size) node = @tree s_id.scan(/ ([\w])__([^;]+)/) do level = expand_level($1) name = $2 if node.children[name].nil? node.children[name] = TaxNode.new(level, name, 1, size, score) else node.children[name].count += 1 node.children[name].score += score end node = node.children[name] end end def merge(tree) node = @tree tree.flatten.each do |new_node| next if new_node.level == 'root' new_node.score = new_node.score / new_node.count new_node.count = 1 if node.children[new_node.name].nil? node.children[new_node.name] = new_node else node.children[new_node.name].count += new_node.count node.children[new_node.name].score += new_node.score node.children[new_node.name].size += new_node.size end node = node.children[new_node.name] end end def flatten(node = @tree, list = []) list << TaxNode.new(node.level, node.name, node.count, node.size, node.score) node.children.each do |name, child| list = flatten(child, list.dup) end list end def lowest_common_ancestor(node = @tree) node.children = {} if node.children.size > 1 node.children.each do |name, child| lowest_common_ancestor(child) end end def each self.flatten.each do |node| yield node end self end private def expand_level(level) hash = { 'd' => "domain", 'k' => "kingdom", 'p' => "phylum", 'c' => "class", 'o' => "order", 'f' => "family", 'g' => "genus", 's' => "species" } raise "unknown level: #{level}" unless hash[level] hash[level] end class TaxNode attr_accessor :level, :name, :count, :size, :score, :children def initialize(level, name, count, size, score) @level = level @name = name @count = count @size = size @score = score @children = {} end def to_bp record = {} record[:REC_TYPE] = "Classification" record[:LEVEL] = @level record[:NAME] = @name record[:COUNT] = @size record[:SCORE] = @count == 0 ? 0.0 : (@score / @count).round(2) record end end end casts = [] casts << {:long=>'LCA', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'size', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) tax_tree = TaxTree.new tax_hash = {} Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| if record[:Q_ID] and record[:S_ID] and record[:SCORE] size = 1 if options[:size] if record[:Q_ID].match(/_(\d+)$/) size = $1.to_i else raise BiopiecesError, "Could not extract size from Q_ID: #{record[:Q_ID]}" end end tax_hash[record[:Q_ID]] = TaxTree.new unless tax_hash[record[:Q_ID]] tax_hash[record[:Q_ID]].add_gg(record[:S_ID], record[:SCORE].to_f, size) else output.puts record end end if options[:LCA] tax_hash.each do |q_id, tree| tree.lowest_common_ancestor end end tax_hash.each do |q_id, tree| tax_tree.merge(tree) end tax_tree.each do |node| output.puts node.to_bp unless node.level == 'root' end end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< __END__