require 'pp'
require 'open3'
require 'narray'
+require 'maasha/align/pair'
require 'maasha/fasta'
class AlignError < StandardError; end;
-ALPH_DNA = %w{A T C G}
-ALPH_AMBI = %w{A T C G M R W S Y K V H D B N}
+ALPH_DNA = %w{A T C G}
+ALPH_AMBI = %w{A T C G M R W S Y K V H D B N}
BIT_INDEL = 0
-# BIT_A = 1 << 0
-# BIT_T = 1 << 1
-# BIT_C = 1 << 2
-# BIT_G = 1 << 3
+BIT_A = 1 << 0
+BIT_T = 1 << 1
+BIT_C = 1 << 2
+BIT_G = 1 << 3
BIT_M = BIT_A | BIT_C
BIT_R = BIT_A | BIT_G
attr_accessor :options
attr_reader :entries
+ include PairAlign
+
# Class method to align sequences in a list of Seq objects and
# return these as a new list of Seq objects.
def self.muscle(entries, verbose = false)
Open3.popen3("muscle", "-quiet") do |stdin, stdout, stderr|
entries.each do |entry|
- raise AlignError, "Duplicate sequence name: #{entry.seq_name}" if index.has_key? entry.seq_name
+ raise AlignError, "Duplicate sequence name: #{entry.seq_name}" if index[entry.seq_name]
index[entry.seq_name] = entry.dup
self.new(result)
end
+
+ # Class method to create a pairwise alignment of two given Seq objects. The
+ # alignment is created by casting a search space the size of the sequences
+ # and save the best scoring match between the sequences and recurse into
+ # the left and right search spaced around this match. When all search spaces
+ # are exhausted the saved matches are used to insert gaps in the sequences.
+ def self.pair(q_entry, s_entry)
+ AlignPair.align(q_entry, s_entry)
+
+ self.new([q_entry, s_entry])
+ end
# Method to initialize an Align object with a list of aligned Seq objects.
def initialize(entries, options = {})
new_seq = Seq.new
new_seq.seq = consensus_seq
new_seq.qual = consensus_qual if @has_qual
- new_seq.type = "dna"
+ new_seq.type = :dna
new_seq
end
def na_add_entries
@entries.each_with_index do |entry, i|
@na_seq[true, i] = NArray.to_na(entry.seq.downcase.tr(TR_NUC, TR_HEX), "byte")
- @na_qual[true, i] = NArray.to_na(entry.qual, "byte") - SCORE_BASE if @has_qual
+ @na_qual[true, i] = NArray.to_na(entry.qual, "byte") - Seq::SCORE_BASE if @has_qual
end
end
# NArrays.
def consensus_calc
if @has_qual
- if @options.has_key? :quality_min
+ if @options[:quality_min]
mask = mask_quality_min
@na_seq *= mask
@na_qual *= mask
end
- if @options.has_key? :quality_mean
+ if @options[:quality_mean]
mask = mask_quality_mean
@na_seq *= mask
end
end
- if @options.has_key? :sequence_min
+ if @options[:sequence_min]
mask = mask_sequence_min
@na_seq *= mask
@na_qual *= mask if @has_qual
end
- if @options.has_key? :gap_max
+ if @options[:gap_max]
mask = mask_gap_max
@na_seq *= mask
@na_qual *= mask if @has_qual
end
- if @options.has_key? :residue_min
+ if @options[:residue_min]
mask = mask_residue_min
@na_seq *= mask
# Method to calculate a consensus quality from a Consensus object.
def consensus_qual
- (@na_qual.mean(1).round.to_type("byte") + SCORE_BASE).to_s
+ (@na_qual.mean(1).round.to_type("byte") + Seq::SCORE_BASE).to_s
end
end
end