1 # Copyright (C) 2007-2011 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
28 require 'maasha/fasta'
30 class AlignError < StandardError; end;
36 ALPH_DNA = %w{A T C G}
37 ALPH_AMBI = %w{A T C G M R W S Y K V H D B N}
50 BIT_V = BIT_A | BIT_C | BIT_G
51 BIT_H = BIT_A | BIT_C | BIT_T
52 BIT_D = BIT_A | BIT_G | BIT_T
53 BIT_B = BIT_C | BIT_G | BIT_T
54 BIT_N = BIT_G | BIT_A | BIT_T | BIT_C
74 TR_NUC = "-" + ALPH_AMBI.join("").downcase
75 TR_HEX = [BIT_INDEL].pack("C") + BITMAP.pack("C*")
82 # Adding an initialize method to the existing Fasta class.
89 # Class for aligning sequences.
91 # Class method to align sequences in a list of Seq objects and
92 # return these as a new list of Seq objects.
93 def self.muscle(entries)
97 Open3.popen3(cmd) do |stdin, stdout, stderr|
98 entries.each { |entry| stdin.puts entry.to_fasta }
101 fa = Fasta.new(stdout)
112 return aligned unless block_given?
115 # Class method to create a consensus sequence from a list of Seq objects and
116 # return a new Seq object with the consensus.
117 def self.consensus(entries)
118 cons = Consensus.new(entries.first.length)
120 entries.each { |entry| cons.add(entry) }
126 # Method to initialize a Consensus object given a Seq object, which is added
128 def initialize(length)
131 @count = NArray.int(@length)
132 @freq = NArray.int(@length, ALPH_DNA.size)
133 @qual = NArray.int(@length, ALPH_DNA.size)
134 @mask_A = NArray.byte(@length).fill!(BIT_A)
135 @mask_T = NArray.byte(@length).fill!(BIT_T)
136 @mask_C = NArray.byte(@length).fill!(BIT_C)
137 @mask_G = NArray.byte(@length).fill!(BIT_G)
141 # Method to add a Seq entry to a Consensus object.
143 seq = NArray.to_na(entry.seq.downcase.tr(TR_NUC, TR_HEX), "byte")
147 mask_A = (seq & @mask_A) > 0
148 mask_T = (seq & @mask_T) > 0
149 mask_C = (seq & @mask_C) > 0
150 mask_G = (seq & @mask_G) > 0
152 @freq[true, ROW_A] += mask_A
153 @freq[true, ROW_T] += mask_T
154 @freq[true, ROW_C] += mask_C
155 @freq[true, ROW_G] += mask_G
157 unless entry.qual.nil?
160 qual = NArray.to_na(entry.qual, "byte") - QUAL_BASE
162 @qual[true, ROW_A] += mask_A * qual
163 @qual[true, ROW_T] += mask_T * qual
164 @qual[true, ROW_C] += mask_C * qual
165 @qual[true, ROW_G] += mask_G * qual
172 new_seq.seq = consensus_seq
173 new_seq.qual = consensus_qual if @has_qual
181 # Method that calculates the sum for each column except the indel row and
182 # returns the sum in an NArray.
187 # Method that locates the maximum value for each column and
188 # returns this in an NArray.
194 freq = @freq[true, [0 ... ALPH_DNA.size]]
195 @qual / (freq + freq.eq(0))
199 (@freq / freq_total.to_type("float") * 100).round
202 # Method to calculate a consensus sequence from a Consensus object.
204 fp = freq_percent > CONSENSUS
205 cons = NArray.byte(@length)
206 cons |= fp[true, ROW_A] *= BIT_A
207 cons |= fp[true, ROW_T] *= BIT_T
208 cons |= fp[true, ROW_C] *= BIT_C
209 cons |= fp[true, ROW_G] *= BIT_G
211 cons *= @count > @count.max * INDEL_RATIO
213 cons.to_s.tr!(TR_HEX, TR_NUC).upcase
217 q = (@qual / (@count + @count.eq(0))).max(1)
219 (q.to_type("byte") + QUAL_BASE).to_s
223 filter = qual_mean > QUAL_MIN