1 # Copyright (C) 2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 #autoload :NArray, 'narray'
29 # IUPAC alphabet and binary encoding of the same
30 # http://en.wikipedia.org/wiki/Nucleic_acid_notation
32 AMBIGUITY_STR = "ACGTUWSMKRYBDHVNacgtuwsmkrybdhvn"
33 AMBIGUITY_BIN = "\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f\x08\x04\x02\x01\x01\x09\x06\x0c\x03\x0a\x05\x07\x0b\x0d\x0e\x0f"
35 def self.hamming_distance(seq1, seq2)
36 (NArray.to_na(seq1.tr(AMBIGUITY_STR, AMBIGUITY_BIN), 'byte') &
37 NArray.to_na(seq2.tr(AMBIGUITY_STR, AMBIGUITY_BIN), 'byte')).eq(0).sum
40 def add_ambiguity_macro inline_builder
41 # Macro for matching nucleotides including ambiguity codes.
42 inline_builder.prefix %{
43 #define MATCH(A,B) ((bitmap[A] & bitmap[B]) != 0)
46 # Bitmap for matching nucleotides including ambiguity codes.
47 # For each value bits are set from the left: bit pos 1 for A,
48 # bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G.
49 inline_builder.prefix %{
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
56 0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
57 0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0,
58 0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0,
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0