#include "fasta.h"
#include "list.h"
-#define BLOCK_SIZE_NT 4 /* one block holds 4 nucleotides. */
-#define BITS_IN_NT 2 /* two bits holds 1 nucleotide. */
-#define BITS_IN_BYTE 8 /* number of bits in one byte. */
-#define BLOCK_SPACE_MAX 2 /* maximum space between two blocks. */
-#define BLOCK_MASK ( BLOCK_SPACE_MAX - 1 ) /* mask for printing block space. */
-#define COUNT_ARRAY_NMEMB ( 1 << 30 ) /* number of objects in the unsigned int count array. */
-#define CUTOFF 1 /* minimum number of motifs in output. */
+#define BLOCK_SIZE_NT 4 /* one block holds 4 nucleotides. */
+#define BITS_IN_NT 2 /* two bits holds 1 nucleotide. */
+#define BITS_IN_BYTE 8 /* number of bits in one byte. */
+#define BLOCK_SPACE_MAX 64 /* maximum space between two blocks. */
+#define BLOCK_MASK ( ( BLOCK_SPACE_MAX << 1 ) - 1 ) /* mask for printing block space. */
+#define COUNT_ARRAY_NMEMB ( 1 << 30 ) /* number of objects in the unsigned int count array. */
+#define CUTOFF 10000 /* minimum number of motifs in output. */
#define add_A( c ) /* add 00 to the rightmost two bits of bin (i.e. do nothing). */
#define add_T( c ) ( c |= 3 ) /* add 11 on the rightmost two bits of c. */
void scan_list( list_sl *list, uint *count_array );
bitblock *bitblock_new();
uint blocks2motif( uchar bin1, uchar bin2, ushort dist );
-size_t filter_results( uint **array_ppt, size_t nmemb, uint cutoff );
int cmp_uint_desc( const void *a, const void *b );
-void count_array_print( uint *count_array, size_t nmemb );
+void count_array_print( uint *count_array, size_t nmemb, size_t cutoff );
void motif_print( uint motif, uint count );
void bitblock_list_print( list_sl *list );
void bitblock_print( bitblock *out );
static void test_bitblock_print();
static void test_bitblock_list_print();
static void test_scan_seq();
-static void test_filter_results();
-static void test_array_sort();
static void test_blocks2motif();
int i = 0;
seq_entry *entry = NULL;
uint *count_array = NULL;
- size_t new_nmemb = 0;
+// size_t new_nmemb = 0;
count_array = count_array_new( COUNT_ARRAY_NMEMB );
fprintf( stderr, "done.\n" );
}
- fprintf( stderr, "Filtering results (cutoff=%d) ... ", CUTOFF );
- new_nmemb = filter_results( &count_array, COUNT_ARRAY_NMEMB, CUTOFF );
- fprintf( stderr, "done.\n" );
-
- fprintf( stderr, "Sorting motifs: ... " );
- qsort( count_array, new_nmemb, sizeof( uint ), cmp_uint_desc );
- fprintf( stderr, "done.\n" );
-
fprintf( stderr, "Printing motifs: ... " );
- count_array_print( count_array, new_nmemb );
+ count_array_print( count_array, COUNT_ARRAY_NMEMB, CUTOFF );
fprintf( stderr, "done.\n" );
seq_destroy( entry );
motif |= dist;
- return motif;
-}
-
-
-size_t filter_results( uint **array_ppt, size_t nmemb, uint cutoff )
-{
- /* Martin A. Hansen, September 2008 */
-
- /* Iterates an array of bipartite scan results */
- /* determining how many elements are above the cutoff, */
- /* then allocate memory for a new array, copy the */
- /* values that were above the cutoff to the new array. */
- /* The number of elements on the new array is returned, */
- /* and the array is replaced. */
-
- uint *array = ( uint * ) *array_ppt;
- uint *new_array = NULL;
- size_t i = 0;
- size_t j = 0;
- size_t new_nmemb = 0;
-
- for ( i = 0; i < nmemb; i++ )
- {
- if ( array[ i ] >= cutoff ) {
- new_nmemb++;
- }
- }
-
- assert( new_nmemb > 0 );
-
- new_array = count_array_new( new_nmemb );
-
- for ( i = 0; i < nmemb; i++ )
- {
- if ( array[ i ] >= cutoff )
- {
- new_array[ j ] = array[ i ];
-
- j++;
- }
- }
-
- *array_ppt = new_array;
-
- return new_nmemb;
-}
-
-
-int cmp_uint_desc( const void *a, const void *b )
-{
- /* Martin A. Hansen, September 2008 */
-
- /* Compare function for qsort of an array of unsigned ints */
- /* to be sorted in descending order. */
-
- const uint *uint_a = ( const uint *) a;
- const uint *uint_b = ( const uint *) b;
+// motif_print( motif, 0 ); /* DEBUG */
- return *uint_b - *uint_a;
+ return motif;
}
-void count_array_print( uint *count_array, size_t nmemb )
+void count_array_print( uint *count_array, size_t nmemb, size_t cutoff )
{
/* Martin A. Hansen, Seqptember 2008. */
motif = i;
count = count_array[ i ];
- motif_print( motif, count );
+ if ( count >= cutoff ) {
+ motif_print( motif, count );
+ }
}
}
uchar bin2 = 0;
ushort dist = 0;
+ // printf( "%d\t", motif ); /* DEBUG */
+
dist = ( ushort ) motif & BLOCK_MASK;
motif >>= sizeof( uchar ) * BITS_IN_BYTE;
test_bitblock_print();
test_bitblock_list_print();
test_scan_seq();
- test_filter_results();
- test_array_sort();
test_blocks2motif();
fprintf( stderr, "All tests OK\n" );
scan_seq( seq, seq_len, count_array );
- count_array_print( count_array, nmemb ); /* DEBUG */
-
- fprintf( stderr, "done.\n" );
-}
-
-
-void test_filter_results()
-{
- fprintf( stderr, " Running test_filter_results ... " );
-
- uint array[] = { 4, 234, 23, 43, 12, 23, 1, 34 };
- uint *array_pt = array;
- size_t array_nmemb = sizeof( array ) / sizeof( uint );
- uint cutoff = 20;
- size_t size = 0;
- uint i = 0;
-
- size = filter_results( &array_pt, array_nmemb, cutoff );
-
- assert( size == 5 ); /* 5 elements greather than 20. */
-
- for ( i = 0; i < size; i ++ ) {
-// printf( "elem: %i\n", array_pt[ i ] );
- }
-
- fprintf( stderr, "done.\n" );
-}
-
-
-void test_array_sort()
-{
- fprintf( stderr, " Running test_array_sort ... " );
-
- uint array[] = { 4, 234, 23, 43, 12, 23, 1, 34 };
- int array_size = sizeof( array ) / sizeof( uint );
- int i = 0;
-
- for ( i = 0; i < array_size; i ++ ) {
-// printf( "elem: %i\n", array[ i ] );
- }
-
- qsort( array, array_size, sizeof( uint ), cmp_uint_desc );
-
- for ( i = 0; i < array_size; i ++ ) {
-// printf( "elem: %i\n", array[ i ] );
- }
-
- assert( array[ 0 ] == 234 ); /* 234 first in sorted array */
- assert( array[ array_size - 1 ] == 1 ); /* 1 last in sorted array */
+ // count_array_print( count_array, nmemb, 1 ); /* DEBUG */
fprintf( stderr, "done.\n" );
}