AMBIGUITY_STR = "ACGTUWSMKRYBDHVNacgtuwsmkrybdhvn"
AMBIGUITY_BIN = "\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f"
+ # Class method to convert a sequence string to a bit string
+ # where the bit positions in each char corresponds to the following:
+ # A = 1000
+ # C = 0100
+ # G = 0010
+ # T = 0001
+ # And ambiguity codes are expressed using similar bit fields.
+ def self.to_bin(seq)
+ seq.tr(AMBIGUITY_STR, AMBIGUITY_BIN)
+ end
+
+ # Class method to convert a bit string to a NArray.
+ def self.to_na(seq)
+ NArray.to_na(self.to_bin(seq), 'byte')
+ end
+
+ # Class method to calculate the Hamming Distance between
+ # two bit fields encoding in NArrays.
def self.hamming_distance(seq1, seq2)
- (NArray.to_na(seq1.tr(AMBIGUITY_STR, AMBIGUITY_BIN), 'byte') &
- NArray.to_na(seq2.tr(AMBIGUITY_STR, AMBIGUITY_BIN), 'byte')).eq(0).sum
+ (self.to_na(seq1) & self.to_na(seq2)).eq(0).sum
end
def add_ambiguity_macro inline_builder