10 BYTES_IN_OLIGO = BYTES_IN_BIN + BYTES_IN_POS
15 def self.find(q_seq, s_seq, space, kmer_size)
16 m = self.new(q_seq, s_seq, space, kmer_size)
20 def initialize(q_seq, s_seq, space, kmer_size)
21 q_space_beg = space.q_min
22 q_space_end = space.q_max
23 s_space_beg = space.s_min
24 s_space_end = space.s_max
28 q_index = Index.new(q_space_beg, q_space_end, q_seq.seq, kmer_size, kmer_size)
29 s_index = Index.new(s_space_beg, s_space_end, s_seq.seq, kmer_size, 1)
31 find_mums(q_index, s_index, kmer_size)
34 match.expand(q_seq.seq, s_seq.seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
46 def find_mums(q_index, s_index, kmer_size)
47 q_index.each do |q_oligo|
48 if s_pos = s_index[q_oligo]
49 q_pos = q_oligo.unpack("II").last
51 last_match = @mums.last
52 new_match = Match.new(q_pos, s_pos, kmer_size)
55 last_match.q_beg + last_match.length == new_match.q_beg and
56 last_match.s_beg + last_match.length == new_match.s_beg
58 last_match.length += kmer_size
67 def initialize(space_beg, space_end, seq, kmer_size, step_size)
68 index_size = (space_end - space_beg) * BYTES_IN_OLIGO
69 index = "\0" * index_size
70 index_size = create_index_C(space_beg, space_end, seq, kmer_size, step_size, index)
71 sort_by_oligo_C(index, index_size)
72 @index_size = uniq_oligo_C(index, index_size)
74 @index = index[0 ... @index_size * BYTES_IN_OLIGO]
78 (0 ... @index_size - 1).each do |i|
79 yield @index[BYTES_IN_OLIGO * i ... BYTES_IN_OLIGO * i + BYTES_IN_OLIGO]
84 search_index_C(@index, @index_size, oligo)
91 unsigned char nuc2bin[256] = {
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120 int oligo_cmp(const void *a, const void *b)
122 oligo *ia = (oligo *) a;
123 oligo *ib = (oligo *) b;
125 return (int) (ia->bin - ib->bin);
130 VALUE create_index_C(
139 unsigned int space_beg = NUM2UINT(_space_beg);
140 unsigned int space_end = NUM2UINT(_space_end);
141 unsigned char *seq = (unsigned char *) StringValuePtr(_seq);
142 unsigned int kmer_size = NUM2UINT(_kmer_size);
143 unsigned int step_size = NUM2UINT(_step_size);
144 oligo *index = (oligo *) StringValuePtr(_index);
147 unsigned int mask = (1 << (2 * kmer_size)) - 1;
148 unsigned int bin = 0;
149 unsigned int size = 0;
152 for (i = space_beg; i < space_beg + kmer_size; i++)
155 bin |= nuc2bin[seq[i]];
159 new.pos = i - kmer_size;
162 while (i <= space_end)
165 bin |= nuc2bin[seq[i]];
169 if ((i % step_size) == 0) {
170 new.bin = (bin & mask);
171 new.pos = i - kmer_size;
176 return UINT2NUM(size);
181 void sort_by_oligo_C(
186 oligo *index = (oligo *) StringValuePtr(_index);
187 unsigned int index_size = NUM2UINT(_index_size);
189 qsort(index, index_size, sizeof(oligo), oligo_cmp);
199 oligo *index = (oligo *) StringValuePtr(_index);
200 unsigned int index_size = NUM2UINT(_index_size);
202 unsigned int new_index_size = 0;
206 for (i = 1; i < index_size; i++)
208 if (index[i].bin != index[j].bin)
211 index[j] = index[i]; // Move it to the front
215 new_index_size = j + 1;
217 return UINT2NUM(new_index_size);
222 VALUE search_index_C(
228 oligo *index = (oligo *) StringValuePtr(_index);
229 unsigned int index_size = NUM2UINT(_index_size);
230 oligo *item = (oligo *) StringValuePtr(_item);
231 oligo *result = NULL;
233 result = (oligo *) bsearch(item, index, index_size, sizeof(oligo), oligo_cmp);
238 return UINT2NUM(result->pos);
245 attr_accessor :length, :score
246 attr_reader :q_beg, :s_beg
248 def initialize(q_beg, s_beg, length)
263 # Method to expand a match as far as possible to the left and right
264 # within a given search space.
265 def expand(q_seq, s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
266 length = expand_left_C(q_seq, s_seq, @q_beg, @s_beg, q_space_beg, s_space_beg)
271 length = expand_right_C(q_seq, s_seq, q_end, s_end, q_space_end, s_space_end)
277 # Method to expand a match as far as possible to the right within a given
279 def expand_right(q_seq, s_seq, q_space_end, s_space_end)
280 while q_end + 1 <= q_space_end and s_end + 1 <= s_space_end and q_seq[q_end + 1] == s_seq[s_end + 1]
286 # Method to expand a match as far as possible to the left within a given
298 unsigned char *q_seq = (unsigned char *) StringValuePtr(_q_seq);
299 unsigned char *s_seq = (unsigned char *) StringValuePtr(_s_seq);
300 unsigned int q_beg = NUM2UINT(_q_beg);
301 unsigned int s_beg = NUM2UINT(_s_beg);
302 unsigned int q_space_beg = NUM2UINT(_q_space_beg);
303 unsigned int s_space_beg = NUM2UINT(_s_space_beg);
305 unsigned int len = 0;
307 while (q_beg > q_space_beg && s_beg > s_space_beg && q_seq[q_beg - 1] == s_seq[s_beg - 1])
314 return UINT2NUM(len);
319 VALUE expand_right_C(
328 unsigned char *q_seq = (unsigned char *) StringValuePtr(_q_seq);
329 unsigned char *s_seq = (unsigned char *) StringValuePtr(_s_seq);
330 unsigned int q_end = NUM2UINT(_q_end);
331 unsigned int s_end = NUM2UINT(_s_end);
332 unsigned int q_space_end = NUM2UINT(_q_space_end);
333 unsigned int s_space_end = NUM2UINT(_s_space_end);
335 unsigned int len = 0;
337 while (q_end + 1 <= q_space_end && s_end + 1 <= s_space_end && q_seq[q_end + 1] == s_seq[s_end + 1])
344 return UINT2NUM(len);