# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'inline'
-#autoload :NArray, 'narray'
module Ambiguity
- # IUPAC alphabet and binary encoding of the same
- # http://en.wikipedia.org/wiki/Nucleic_acid_notation
-
- AMBIGUITY_STR = "ACGTUWSMKRYBDHVNacgtuwsmkrybdhvn"
- AMBIGUITY_BIN = "\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f"
-
- # Class method to convert a sequence string to a bit string
- # where the bit positions in each char corresponds to the following:
- # A = 1000
- # C = 0100
- # G = 0010
- # T = 0001
- # And ambiguity codes are expressed using similar bit fields.
- def self.to_bin(seq)
- seq.tr(AMBIGUITY_STR, AMBIGUITY_BIN)
- end
-
- # Class method to convert a bit string to a NArray.
- def self.to_na(seq)
- NArray.to_na(self.to_bin(seq), 'byte')
- end
-
- # Class method to calculate the Hamming Distance between
- # two bit fields encoding in NArrays.
- def self.hamming_distance(seq1, seq2)
- (self.to_na(seq1) & self.to_na(seq2)).eq(0).sum
- end
-
def add_ambiguity_macro inline_builder
# Macro for matching nucleotides including ambiguity codes.
inline_builder.prefix %{
- #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0)
+ #define MATCH(A,B) ((bitmap[(int) A] & bitmap[(int) B]) != 0)
}
# Bitmap for matching nucleotides including ambiguity codes.