# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# IUPAC nucleotide pair ambiguity equivalents
-EQUAL = {
- :AA => true, :BU => true, :TH => true, :UY => true,
- :TT => true, :CB => true, :UH => true, :SC => true,
- :CC => true, :GB => true, :VA => true, :SG => true,
- :GG => true, :TB => true, :VC => true, :CS => true,
- :UU => true, :UB => true, :VG => true, :GS => true,
- :NA => true, :DA => true, :AV => true, :WA => true,
- :NT => true, :DG => true, :CV => true, :WT => true,
- :NC => true, :DT => true, :GV => true, :WU => true,
- :NG => true, :DU => true, :KG => true, :AW => true,
- :NU => true, :AD => true, :KT => true, :TW => true,
- :AN => true, :GD => true, :KU => true, :UW => true,
- :TN => true, :TD => true, :GK => true, :RA => true,
- :CN => true, :UD => true, :TK => true, :RG => true,
- :GN => true, :HA => true, :UK => true, :AR => true,
- :UN => true, :HC => true, :YC => true, :GR => true,
- :NN => true, :HT => true, :YT => true, :MA => true,
- :BC => true, :HU => true, :YU => true, :MC => true,
- :BG => true, :AH => true, :CY => true, :AM => true,
- :BT => true, :CH => true, :TY => true, :CM => true,
-}
+# IUPAC nucleotide pair ambiguity equivalents are saved in an
+# array of bit fields.
+
+BIT_A = 1 << 0
+BIT_T = 1 << 1
+BIT_C = 1 << 2
+BIT_G = 1 << 3
+
+EQUAL = Array.new(256, 0)
+EQUAL['A'.ord] = BIT_A
+EQUAL['T'.ord] = BIT_T
+EQUAL['U'.ord] = BIT_T
+EQUAL['C'.ord] = BIT_C
+EQUAL['G'.ord] = BIT_G
+EQUAL['M'.ord] = (BIT_A|BIT_C)
+EQUAL['R'.ord] = (BIT_A|BIT_G)
+EQUAL['W'.ord] = (BIT_A|BIT_T)
+EQUAL['S'.ord] = (BIT_C|BIT_G)
+EQUAL['Y'.ord] = (BIT_C|BIT_T)
+EQUAL['K'.ord] = (BIT_G|BIT_T)
+EQUAL['B'.ord] = (BIT_C|BIT_G|BIT_T)
+EQUAL['D'.ord] = (BIT_A|BIT_G|BIT_T)
+EQUAL['H'.ord] = (BIT_A|BIT_C|BIT_T)
+EQUAL['V'.ord] = (BIT_A|BIT_C|BIT_G)
+EQUAL['N'.ord] = (BIT_A|BIT_C|BIT_G|BIT_T)
# Module containing code to locate nucleotide patterns in sequences allowing for
# ambiguity codes and a given maximum edit distance.
# returned in an Array.
def scan(pattern, pos = 0, max_edit_distance = 0)
matches = []
- offset = pos
- while match = match(pattern, offset, max_edit_distance)
+ while match = match(pattern, pos, max_edit_distance)
if block_given?
yield match
else
matches << match
end
- offset = match.pos + 1
+ pos = match.pos + 1
end
return matches unless block_given?
new_vector = @vector.dup
(0 ... @pattern.length).each do |i|
- if EQUAL[(@seq[@pos] + @pattern[i]).upcase.to_sym]
+ if match?(@seq[@pos], @pattern[i])
new_vector[i + 1] = @vector[i].dup
new_vector[i + 1].matches += 1
else
elsif insertion?(mismatch, insertion, deletion)
insertion.insertions += 1
new_vector[i + 1] = insertion
- else
- raise "AAAAarrgh"
end
end
end
@vector = new_vector
end
+ # Method to determine if a match occurred.
+ def match?(char1, char2)
+ (EQUAL[char1.upcase.ord] & EQUAL[char2.upcase.ord]) != 0
+ end
+
# Method to determine if a mismatch occured.
def mismatch?(mismatch, insertion, deletion)
if mismatch.edit_distance <= insertion.edit_distance and