X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq%2Fambiguity.rb;h=4de50ade44ec1c718ee39f8b65a92530c36a6de0;hb=92dba07b3dd9837ed90212126998a8a1f9e00652;hp=7c21aedb62929837f8f2bb87202c48e71a216e97;hpb=e5b4c0d3fcbde504c49d6f69e2791bd74434d0d2;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq/ambiguity.rb b/code_ruby/lib/maasha/seq/ambiguity.rb index 7c21aed..4de50ad 100644 --- a/code_ruby/lib/maasha/seq/ambiguity.rb +++ b/code_ruby/lib/maasha/seq/ambiguity.rb @@ -23,41 +23,12 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'inline' -#autoload :NArray, 'narray' module Ambiguity - # IUPAC alphabet and binary encoding of the same - # http://en.wikipedia.org/wiki/Nucleic_acid_notation - - AMBIGUITY_STR = "ACGTUWSMKRYBDHVNacgtuwsmkrybdhvn" - AMBIGUITY_BIN = "\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f" - - # Class method to convert a sequence string to a bit string - # where the bit positions in each char corresponds to the following: - # A = 1000 - # C = 0100 - # G = 0010 - # T = 0001 - # And ambiguity codes are expressed using similar bit fields. - def self.to_bin(seq) - seq.tr(AMBIGUITY_STR, AMBIGUITY_BIN) - end - - # Class method to convert a bit string to a NArray. - def self.to_na(seq) - NArray.to_na(self.to_bin(seq), 'byte') - end - - # Class method to calculate the Hamming Distance between - # two bit fields encoding in NArrays. - def self.hamming_distance(seq1, seq2) - (self.to_na(seq1) & self.to_na(seq2)).eq(0).sum - end - def add_ambiguity_macro inline_builder # Macro for matching nucleotides including ambiguity codes. inline_builder.prefix %{ - #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0) + #define MATCH(A,B) ((bitmap[(int) A] & bitmap[(int) B]) != 0) } # Bitmap for matching nucleotides including ambiguity codes.