From: martinahansen Date: Wed, 10 Sep 2008 06:45:09 +0000 (+0000) Subject: bipartite_scan polish X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ca3ebdb8325254ef22b2dbb25d0fd64f5e04815d;p=biopieces.git bipartite_scan polish git-svn-id: http://biopieces.googlecode.com/svn/trunk@251 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_c/Maasha/src/bipartite_scan.c b/code_c/Maasha/src/bipartite_scan.c index d677f66..639d26f 100644 --- a/code_c/Maasha/src/bipartite_scan.c +++ b/code_c/Maasha/src/bipartite_scan.c @@ -5,13 +5,13 @@ #include "fasta.h" #include "list.h" -#define BLOCK_SIZE_NT 4 /* one block holds 4 nucleotides. */ -#define BITS_IN_NT 2 /* two bits holds 1 nucleotide. */ -#define BITS_IN_BYTE 8 /* number of bits in one byte. */ -#define BLOCK_SPACE_MAX 2 /* maximum space between two blocks. */ -#define BLOCK_MASK ( BLOCK_SPACE_MAX - 1 ) /* mask for printing block space. */ -#define COUNT_ARRAY_NMEMB ( 1 << 30 ) /* number of objects in the unsigned int count array. */ -#define CUTOFF 1 /* minimum number of motifs in output. */ +#define BLOCK_SIZE_NT 4 /* one block holds 4 nucleotides. */ +#define BITS_IN_NT 2 /* two bits holds 1 nucleotide. */ +#define BITS_IN_BYTE 8 /* number of bits in one byte. */ +#define BLOCK_SPACE_MAX 64 /* maximum space between two blocks. */ +#define BLOCK_MASK ( ( BLOCK_SPACE_MAX << 1 ) - 1 ) /* mask for printing block space. */ +#define COUNT_ARRAY_NMEMB ( 1 << 30 ) /* number of objects in the unsigned int count array. */ +#define CUTOFF 10000 /* minimum number of motifs in output. */ #define add_A( c ) /* add 00 to the rightmost two bits of bin (i.e. do nothing). */ #define add_T( c ) ( c |= 3 ) /* add 11 on the rightmost two bits of c. */ @@ -72,9 +72,8 @@ void scan_seq( char *seq, size_t seq_len, uint *count_array ); void scan_list( list_sl *list, uint *count_array ); bitblock *bitblock_new(); uint blocks2motif( uchar bin1, uchar bin2, ushort dist ); -size_t filter_results( uint **array_ppt, size_t nmemb, uint cutoff ); int cmp_uint_desc( const void *a, const void *b ); -void count_array_print( uint *count_array, size_t nmemb ); +void count_array_print( uint *count_array, size_t nmemb, size_t cutoff ); void motif_print( uint motif, uint count ); void bitblock_list_print( list_sl *list ); void bitblock_print( bitblock *out ); @@ -86,8 +85,6 @@ static void test_bitblock_new(); static void test_bitblock_print(); static void test_bitblock_list_print(); static void test_scan_seq(); -static void test_filter_results(); -static void test_array_sort(); static void test_blocks2motif(); @@ -137,7 +134,7 @@ void run_scan( int argc, char *argv[] ) int i = 0; seq_entry *entry = NULL; uint *count_array = NULL; - size_t new_nmemb = 0; +// size_t new_nmemb = 0; count_array = count_array_new( COUNT_ARRAY_NMEMB ); @@ -152,16 +149,8 @@ void run_scan( int argc, char *argv[] ) fprintf( stderr, "done.\n" ); } - fprintf( stderr, "Filtering results (cutoff=%d) ... ", CUTOFF ); - new_nmemb = filter_results( &count_array, COUNT_ARRAY_NMEMB, CUTOFF ); - fprintf( stderr, "done.\n" ); - - fprintf( stderr, "Sorting motifs: ... " ); - qsort( count_array, new_nmemb, sizeof( uint ), cmp_uint_desc ); - fprintf( stderr, "done.\n" ); - fprintf( stderr, "Printing motifs: ... " ); - count_array_print( count_array, new_nmemb ); + count_array_print( count_array, COUNT_ARRAY_NMEMB, CUTOFF ); fprintf( stderr, "done.\n" ); seq_destroy( entry ); @@ -382,69 +371,13 @@ uint blocks2motif( uchar bin1, uchar bin2, ushort dist ) motif |= dist; - return motif; -} - - -size_t filter_results( uint **array_ppt, size_t nmemb, uint cutoff ) -{ - /* Martin A. Hansen, September 2008 */ - - /* Iterates an array of bipartite scan results */ - /* determining how many elements are above the cutoff, */ - /* then allocate memory for a new array, copy the */ - /* values that were above the cutoff to the new array. */ - /* The number of elements on the new array is returned, */ - /* and the array is replaced. */ - - uint *array = ( uint * ) *array_ppt; - uint *new_array = NULL; - size_t i = 0; - size_t j = 0; - size_t new_nmemb = 0; - - for ( i = 0; i < nmemb; i++ ) - { - if ( array[ i ] >= cutoff ) { - new_nmemb++; - } - } - - assert( new_nmemb > 0 ); - - new_array = count_array_new( new_nmemb ); - - for ( i = 0; i < nmemb; i++ ) - { - if ( array[ i ] >= cutoff ) - { - new_array[ j ] = array[ i ]; - - j++; - } - } - - *array_ppt = new_array; - - return new_nmemb; -} - - -int cmp_uint_desc( const void *a, const void *b ) -{ - /* Martin A. Hansen, September 2008 */ - - /* Compare function for qsort of an array of unsigned ints */ - /* to be sorted in descending order. */ - - const uint *uint_a = ( const uint *) a; - const uint *uint_b = ( const uint *) b; +// motif_print( motif, 0 ); /* DEBUG */ - return *uint_b - *uint_a; + return motif; } -void count_array_print( uint *count_array, size_t nmemb ) +void count_array_print( uint *count_array, size_t nmemb, size_t cutoff ) { /* Martin A. Hansen, Seqptember 2008. */ @@ -460,7 +393,9 @@ void count_array_print( uint *count_array, size_t nmemb ) motif = i; count = count_array[ i ]; - motif_print( motif, count ); + if ( count >= cutoff ) { + motif_print( motif, count ); + } } } @@ -478,6 +413,8 @@ void motif_print( uint motif, uint count ) uchar bin2 = 0; ushort dist = 0; + // printf( "%d\t", motif ); /* DEBUG */ + dist = ( ushort ) motif & BLOCK_MASK; motif >>= sizeof( uchar ) * BITS_IN_BYTE; @@ -530,8 +467,6 @@ void run_tests() test_bitblock_print(); test_bitblock_list_print(); test_scan_seq(); - test_filter_results(); - test_array_sort(); test_blocks2motif(); fprintf( stderr, "All tests OK\n" ); @@ -633,55 +568,7 @@ void test_scan_seq() scan_seq( seq, seq_len, count_array ); - count_array_print( count_array, nmemb ); /* DEBUG */ - - fprintf( stderr, "done.\n" ); -} - - -void test_filter_results() -{ - fprintf( stderr, " Running test_filter_results ... " ); - - uint array[] = { 4, 234, 23, 43, 12, 23, 1, 34 }; - uint *array_pt = array; - size_t array_nmemb = sizeof( array ) / sizeof( uint ); - uint cutoff = 20; - size_t size = 0; - uint i = 0; - - size = filter_results( &array_pt, array_nmemb, cutoff ); - - assert( size == 5 ); /* 5 elements greather than 20. */ - - for ( i = 0; i < size; i ++ ) { -// printf( "elem: %i\n", array_pt[ i ] ); - } - - fprintf( stderr, "done.\n" ); -} - - -void test_array_sort() -{ - fprintf( stderr, " Running test_array_sort ... " ); - - uint array[] = { 4, 234, 23, 43, 12, 23, 1, 34 }; - int array_size = sizeof( array ) / sizeof( uint ); - int i = 0; - - for ( i = 0; i < array_size; i ++ ) { -// printf( "elem: %i\n", array[ i ] ); - } - - qsort( array, array_size, sizeof( uint ), cmp_uint_desc ); - - for ( i = 0; i < array_size; i ++ ) { -// printf( "elem: %i\n", array[ i ] ); - } - - assert( array[ 0 ] == 234 ); /* 234 first in sorted array */ - assert( array[ array_size - 1 ] == 1 ); /* 1 last in sorted array */ + // count_array_print( count_array, nmemb, 1 ); /* DEBUG */ fprintf( stderr, "done.\n" ); }