class AlignError < StandardError; end;
-CONSENSUS = 20
+CONSENSUS = 0.20
INDEL_RATIO = 0.5
-QUAL_MIN = 15
+FREQ_MIN = 2
QUAL_BASE = 64
ALPH_DNA = %w{A T C G}
ALPH_AMBI = %w{A T C G M R W S Y K V H D B N}
BIT_INDEL = 0
-BIT_A = 1 << 0
-BIT_T = 1 << 1
-BIT_C = 1 << 2
-BIT_G = 1 << 3
+# BIT_A = 1 << 0
+# BIT_T = 1 << 1
+# BIT_C = 1 << 2
+# BIT_G = 1 << 3
+
BIT_M = BIT_A | BIT_C
BIT_R = BIT_A | BIT_G
BIT_W = BIT_A | BIT_T
# Class for aligning sequences.
class Align
+ attr_reader :entries
+
# Class method to align sequences in a list of Seq objects and
# return these as a new list of Seq objects.
def self.muscle(entries, verbose = false)
end
end
- result
+ self.new(result)
+ end
+
+ # Method to initialize an Align object with a list of aligned Seq objects.
+ def initialize(entries)
+ @entries = entries
+ end
+
+ # Method that returns the length of the alignment.
+ def length
+ @entries.first.length
+ end
+
+ # Method that returns the number of members or sequences in the alignment.
+ def members
+ @entries.size
end
- # Class method to create a consensus sequence from a list of Seq objects and
+ # Method to create a consensus sequence from an Align object and
# return a new Seq object with the consensus.
- def self.consensus(entries)
- cons = Consensus.new(entries.first.length)
+ def consensus
+ cons = Consensus.new(self.length)
- entries.each { |entry| cons.add(entry) }
+ @entries.each { |entry| cons.add(entry) }
cons.to_seq
end
+ # Method to pretty print an alignment from an Align object.
+ def to_s
+ cons = self.consensus
+ cons.mask_seq_soft!(20) unless cons.qual.nil?
+
+ max_name = @entries.group_by { |entry| entry.seq_name.length }.max.first
+
+ @entries.each do |entry|
+ puts entry.seq_name + (" " * (max_name + 3 - entry.seq_name.length )) + entry.seq
+ end
+
+ output = ""
+ output << " " * (max_name + 3) + cons.seq + $/
+ output << " " * (max_name + 3) + cons.qual.tr("[@-h]", " ..........ooooooooooOOOOOOOOOO") unless cons.qual.nil?
+ output
+ end
+
+ private
+
class Consensus
# Method to initialize a Consensus object given a Seq object, which is added
# to the Consensus.
def initialize(length)
@length = length
- @count = NArray.int(@length)
@freq = NArray.int(@length, ALPH_DNA.size)
@qual = NArray.int(@length, ALPH_DNA.size)
@mask_A = NArray.byte(@length).fill!(BIT_A)
@mask_C = NArray.byte(@length).fill!(BIT_C)
@mask_G = NArray.byte(@length).fill!(BIT_G)
@has_qual = false
+ @count = 0
end
# Method to add a Seq entry to a Consensus object.
def add(entry)
- seq = NArray.to_na(entry.seq.downcase.tr(TR_NUC, TR_HEX), "byte")
+ @count += 1
- @count += seq > 0
+ seq = NArray.to_na(entry.seq.downcase.tr(TR_NUC, TR_HEX), "byte")
mask_A = (seq & @mask_A) > 0
mask_T = (seq & @mask_T) > 0
end
def to_seq
- #qual_filter
new_seq = Seq.new
new_seq.seq = consensus_seq
new_seq.qual = consensus_qual if @has_qual
private
- # Method that calculates the sum for each column except the indel row and
- # returns the sum in an NArray.
- def freq_total
- @freq.sum(1)
- end
-
- # Method that locates the maximum value for each column and
- # returns this in an NArray.
- def freq_max
- @freq.max(1)
- end
-
- def qual_mean
- freq = @freq[true, [0 ... ALPH_DNA.size]]
- @qual / (freq + freq.eq(0))
- end
-
- def freq_percent
- (@freq / freq_total.to_type("float") * 100).round
- end
-
# Method to calculate a consensus sequence from a Consensus object.
def consensus_seq
- fp = freq_percent > CONSENSUS
+ fp = (@freq.to_type("float") / @count) > CONSENSUS
+
+ fp *= @freq >= FREQ_MIN
+
cons = NArray.byte(@length)
cons |= fp[true, ROW_A] *= BIT_A
cons |= fp[true, ROW_T] *= BIT_T
cons |= fp[true, ROW_C] *= BIT_C
cons |= fp[true, ROW_G] *= BIT_G
-
- cons *= @count > @count.max * INDEL_RATIO
+# cons *= @freq.sum(1).to_type("float") / @count > INDEL_RATIO
cons.to_s.tr!(TR_HEX, TR_NUC).upcase
end
def consensus_qual
- q = (@qual / (@count + @count.eq(0))).max(1)
+ q = qual_mean.max(1)
(q.to_type("byte") + QUAL_BASE).to_s
end
- def qual_filter
- filter = qual_mean > QUAL_MIN
-
- # pp @freq
- # pp @qual
-
- # @freq *= filter
- # @qual *= filter
-
- # pp @freq
- # pp @qual
+ def qual_mean
+ @qual / (@freq + @freq.eq(0))
end
end
end