6 /*********************************** DECLARATIONS ************************************/
19 typedef struct _search_space search_space;
31 typedef struct _match match;
40 typedef struct _list list;
43 /*********************************** FUNCTIONS ************************************/
46 search_space *search_space_new( char *q_seq, char *s_seq, unsigned int q_beg, unsigned int s_beg, unsigned int q_end, unsigned int s_end )
48 search_space *new = NULL;
50 new = mem_get( sizeof( search_space ) );
52 assert( q_seq != NULL );
53 assert( s_seq != NULL );
56 assert( q_end > q_beg );
57 assert( s_end > s_beg );
70 match *match_new( unsigned int q_beg, unsigned int s_beg, unsigned int len )
74 new = mem_get( sizeof( match ) );
86 void match_add( match **old_ppt, match **new_ppt )
88 match *old = *old_ppt;
89 match *new = *new_ppt;
91 new->next = old->next;
96 void match_print( match *match_pt )
98 printf( "q_beg: %d\ts_beg: %d\tlen: %d\tscore: %f\n", match_pt->q_beg, match_pt->s_beg, match_pt->len, match_pt->score );
102 void matches_print( match *matches )
106 for ( m = matches; m != NULL; m = m->next ) {
112 void match_expand_forward( match *match_pt, search_space *ss )
114 while ( match_pt->q_beg + match_pt->len <= ss->q_end &&
115 match_pt->s_beg + match_pt->len <= ss->s_end &&
116 toupper( ss->q_seq[ match_pt->q_beg + match_pt->len ] ) == toupper( ss->s_seq[ match_pt->s_beg + match_pt->len ] ) )
123 void match_expand_backward( match *match_pt, search_space *ss )
125 while ( match_pt->q_beg > ss->q_beg &&
126 match_pt->s_beg > ss->q_beg &&
127 toupper( ss->q_seq[ match_pt->q_beg ] ) == toupper( ss->s_seq[ match_pt->s_beg ] ) )
136 void match_expand( match *match_pt, search_space *ss )
138 match_expand_forward( match_pt, ss );
139 match_expand_backward( match_pt, ss );
143 bool match_redundant( match *matches, match *new )
145 // FIXME replace with binary search scheme. (test if matches always are sorted).
149 for ( m = matches; m != NULL; m = m->next )
151 if ( new->q_beg >= m->q_beg &&
152 new->s_beg >= m->s_beg &&
153 new->q_beg + new->len - 1 <= m->q_beg + m->len - 1 &&
154 new->s_beg + new->len - 1 <= m->s_beg + m->len - 1
169 new = mem_get( sizeof( list ) );
178 void list_add( list **old_ppt, list **new_ppt )
180 list *old = *old_ppt;
181 list *new = *new_ppt;
183 new->next = old->next;
188 void list_print( list *list_pt )
192 for ( node = list_pt; node != NULL; node = node->next ) {
193 printf( "node->val: %d\n", node->val );
198 void seq_index( hash **index_ppt, char *seq, unsigned int seq_beg, unsigned int seq_end, unsigned int word_size )
200 hash *index = *index_ppt;
206 assert( 0 <= seq_beg );
207 assert( seq_beg < seq_end );
208 assert( word_size > 0 );
210 word = mem_get_zero( sizeof( char ) * word_size + 1 );
212 for ( i = seq_beg; i < seq_end - word_size + 2; i++ )
214 memcpy( word, &seq[ i ], word_size );
220 if ( ( old = hash_get( index, word ) ) != NULL ) {
221 list_add( &old, &new );
223 hash_add( index, word, new );
231 void index_print( hash *index )
233 hash_elem *bucket = NULL;
234 list *pos_list = NULL;
238 printf( "\ntable_size: %zu mask: %zu elem_count: %zu\n", index->table_size, index->mask, index->nmemb );
240 for ( i = 0; i < index->table_size; i++ )
242 bucket = index->table[ i ];
244 while ( bucket != NULL )
246 printf( "i: %zu key: %s val: ", i, bucket->key );
248 pos_list = bucket->val;
250 for ( node = pos_list; node != NULL; node = node->next ) {
251 printf( "%d,", node->val );
256 bucket = bucket->next;
262 unsigned int matches_find_s( match **matches_ppt, hash *index, search_space *ss, unsigned int word_size )
264 match *matches = *matches_ppt;
265 unsigned int s_pos = 0;
266 unsigned int match_count = 0;
268 list *pos_list = NULL;
272 word = mem_get_zero( sizeof( char ) * word_size + 1 );
274 for ( s_pos = ss->s_beg; s_pos < ss->s_end - word_size + 2; s_pos++ )
276 memcpy( word, &ss->s_seq[ s_pos ], word_size );
278 if ( ( pos_list = hash_get( index, word ) ) != NULL )
280 for ( node = pos_list; node != NULL; node = node->next )
282 new = match_new( node->val, s_pos, word_size );
284 if ( ! match_redundant( matches, new ) )
286 match_expand( new, ss );
288 match_add( &matches, &new );
296 printf( "her\n" ); // DEBUG
297 matches_print( matches );
298 index_print( index );
307 unsigned int matches_find( match **matches_ppt, search_space *ss, unsigned int word_size )
309 match *matches = *matches_ppt;
311 unsigned int match_count = 0;
313 assert( word_size > 0 );
315 if ( ss->q_end - ss->q_beg < ss->s_end - ss->s_beg )
317 seq_index( &index, ss->q_seq, ss->q_beg, ss->q_end, word_size );
319 match_count = matches_find_s( &matches, index, ss, word_size );
323 seq_index( &index, ss->s_seq, ss->s_beg, ss->s_end, word_size );
325 // match_count = matches_find_q( &matches, index, q_seq, s_seq, q_beg, q_end, word_size );
328 // index_destroy( index );
334 /*********************************** UNIT TESTS ************************************/
337 void test_search_space_new()
339 fprintf( stderr, " Testing search_space_new ... " );
341 search_space *new = NULL;
343 new = search_space_new( "ATCG", "GCTA", 0, 1, 2, 3 );
345 assert( new->q_seq == "ATCG" );
346 assert( new->s_seq == "GCTA" );
347 assert( new->q_beg == 0 );
348 assert( new->s_beg == 1 );
349 assert( new->q_end == 2 );
350 assert( new->s_end == 3 );
352 fprintf( stderr, "done.\n" );
356 void test_match_new()
358 fprintf( stderr, " Testing match_new ... " );
360 unsigned int q_beg = 1;
361 unsigned int s_beg = 1;
362 unsigned int len = 2;
365 new = match_new( q_beg, s_beg, len );
367 assert( new->next == NULL );
368 assert( new->q_beg == 1 );
369 assert( new->s_beg == 1 );
370 assert( new->len == 2 );
371 assert( new->score == 0.0 );
373 fprintf( stderr, "done.\n" );
377 void test_match_expand_forward()
379 fprintf( stderr, " Testing match_expand_forward ... " );
381 search_space *new_ss = NULL;
382 match *new_match = NULL;
384 new_ss = search_space_new( "ATCGG", "atcg", 0, 0, 4, 3 );
385 new_match = match_new( 1, 1, 2 );
387 match_expand_forward( new_match, new_ss );
389 assert( new_match->q_beg == 1 );
390 assert( new_match->s_beg == 1 );
391 assert( new_match->len == 3 );
393 fprintf( stderr, "done.\n" );
397 void test_match_expand_backward()
399 fprintf( stderr, " Testing match_expand_backward ... " );
401 search_space *new_ss = NULL;
402 match *new_match = NULL;
404 new_ss = search_space_new( "ATCGG", "atcg", 0, 0, 4, 3 );
405 new_match = match_new( 1, 1, 2 );
407 match_expand_backward( new_match, new_ss );
409 assert( new_match->q_beg == 0 );
410 assert( new_match->s_beg == 0 );
411 assert( new_match->len == 3 );
413 fprintf( stderr, "done.\n" );
417 void test_match_expand()
419 fprintf( stderr, " Testing match_expand ... " );
421 search_space *new_ss = NULL;
422 match *new_match = NULL;
424 new_ss = search_space_new( "ATCGG", "atcg", 0, 0, 4, 3 );
425 new_match = match_new( 1, 1, 2 );
427 match_expand( new_match, new_ss );
429 assert( new_match->q_beg == 0 );
430 assert( new_match->s_beg == 0 );
431 assert( new_match->len == 4 );
433 fprintf( stderr, "done.\n" );
437 void test_match_redundant_duplicate()
439 fprintf( stderr, " Testing match_redundant_duplicate ... " );
441 match *matches = NULL;
444 matches = match_new( 0, 1, 2 );
445 new = match_new( 0, 1, 2 );
447 assert( match_redundant( matches, new ) == TRUE );
449 fprintf( stderr, "done.\n" );
453 void test_match_redundant_upstream()
455 fprintf( stderr, " Testing match_redundant_upstream ... " );
457 match *matches = NULL;
460 matches = match_new( 5, 5, 2 );
461 new = match_new( 4, 4, 3 );
463 assert( match_redundant( matches, new ) == FALSE );
465 fprintf( stderr, "done.\n" );
469 void test_match_redundant_downstream()
471 fprintf( stderr, " Testing match_redundant_downstream ... " );
473 match *matches = NULL;
476 matches = match_new( 5, 5, 2 );
477 new = match_new( 6, 6, 2 );
479 assert( match_redundant( matches, new ) == FALSE );
481 fprintf( stderr, "done.\n" );
487 fprintf( stderr, " Testing list_new ... " );
493 assert( new->next == NULL );
494 assert( new->val == 0 );
496 fprintf( stderr, "done.\n" );
502 fprintf( stderr, " Testing list_add ... " );
513 list_add( &old, &new );
515 assert( old->val == 10 );
516 assert( old->next->val == 20 );
518 fprintf( stderr, "done.\n" );
522 void test_seq_index()
524 fprintf( stderr, " Testing seq_index ... " );
526 char *seq = "ATCGATCG";
527 unsigned int seq_beg = 0;
528 unsigned int seq_end = strlen( seq ) - 1;
529 unsigned int word_size = 2;
531 unsigned int size = 16;
534 index = hash_new( size );
536 seq_index( &index, seq, seq_beg, seq_end, word_size );
538 // index_print( index );
540 node = hash_get( index, "GA" );
542 assert( node->val == 3 );
544 fprintf( stderr, "done.\n" );
548 void test_matches_find_s()
550 fprintf( stderr, " Testing matches_find_s ... " );
552 search_space *ss = NULL;
553 unsigned int word_size = 2;
554 unsigned int match_count = 0;
555 match *matches = NULL;
557 unsigned int size = 16;
559 ss = search_space_new( "ATCG", "ATCG", 0, 0, 3, 3 );
560 // matches = match_new( 0, 0, 0 ); // FIXME: dummy match
561 matches = mem_get( sizeof( match ) );
563 index = hash_new( size );
565 seq_index( &index, ss->q_seq, ss->q_beg, ss->q_end, word_size );
567 match_count = matches_find_s( &matches, index, ss, word_size );
569 fprintf( stderr, "done.\n" );
573 void test_matches_find()
575 fprintf( stderr, " Testing matches_find ... " );
577 search_space *ss = NULL;
578 unsigned int word_size = 2;
579 unsigned int match_count = 0;
580 match *matches = NULL;
582 ss = search_space_new( "ATCG", "ATCG", 0, 0, 3, 3 );
584 match_count = matches_find( &matches, ss, word_size );
586 fprintf( stderr, "done.\n" );
592 fprintf( stderr, "Running all tests:\n" );
594 test_search_space_new();
597 test_match_expand_forward();
598 test_match_expand_backward();
600 test_match_redundant_duplicate();
601 test_match_redundant_upstream();
602 test_match_redundant_downstream();
609 // test_matches_find_q();
610 test_matches_find_s();
613 fprintf( stderr, "Done.\n\n" );
617 /*********************************** MAIN ************************************/
628 /***********************************************************************/