From 500c0b6d1735a94d105dbe195dfec25b2c67556c Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 28 Aug 2008 05:12:19 +0000 Subject: [PATCH] read_fasta fix for stupid \r git-svn-id: http://biopieces.googlecode.com/svn/trunk@227 74ccb610-7750-0410-82ae-013aeee3265d --- code_c/Maasha/src/inc/common.h | 21 --- code_c/Maasha/src/inc/fasta.h | 4 +- code_c/Maasha/src/inc/list.h | 87 +++++++++- code_c/Maasha/src/inc/mem.h | 3 +- code_c/Maasha/src/lib/fasta.c | 24 +-- code_c/Maasha/src/lib/filesys.c | 8 +- code_c/Maasha/src/lib/hash.c | 4 +- code_c/Maasha/src/lib/list.c | 222 +++++++++++++++++++++----- code_c/Maasha/src/lib/mem.c | 12 +- code_c/Maasha/src/test/Makefile | 6 +- code_c/Maasha/src/test/test_fasta.c | 6 +- code_c/Maasha/src/test/test_filesys.c | 6 - code_c/Maasha/src/test/test_mem.c | 31 ++-- code_c/Maasha/src/testall.pl | 1 + code_c/Maasha/src/tetra_count.c | 70 ++++---- code_perl/Maasha/Fasta.pm | 5 +- 16 files changed, 355 insertions(+), 155 deletions(-) diff --git a/code_c/Maasha/src/inc/common.h b/code_c/Maasha/src/inc/common.h index 3414320..e62a08b 100644 --- a/code_c/Maasha/src/inc/common.h +++ b/code_c/Maasha/src/inc/common.h @@ -27,27 +27,6 @@ typedef char bool; #define die assert( DEBUG_EXIT ) -/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ - - -// At some point test if typdef struct list list will allow us to move all this stuff to list.h and list.c - - -/* Singly linked list with a pointer to the next element and a pointer to a value. */ -struct list -{ - struct list *next; - void *val; -}; - -/* Singly linked list with a pointer to the next element and an integer value. */ -struct list_int -{ - struct list *next; - int val; -}; - - /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ diff --git a/code_c/Maasha/src/inc/fasta.h b/code_c/Maasha/src/inc/fasta.h index 98f9272..68f0bed 100644 --- a/code_c/Maasha/src/inc/fasta.h +++ b/code_c/Maasha/src/inc/fasta.h @@ -10,7 +10,7 @@ bool fasta_get_entry( FILE *fp, seq_entry **entry_ppt ); void fasta_put_entry( seq_entry *entry ); /* Get all sequence entries from a FASTA file in a list. */ -void fasta_get_entries( FILE *fp, struct list **entries ); +//void fasta_get_entries( FILE *fp, struct list **entries ); /* Output all sequence entries from a list in FASTA format. */ -void fasta_put_entries( struct list *entries ); +//void fasta_put_entries( struct list *entries ); diff --git a/code_c/Maasha/src/inc/list.h b/code_c/Maasha/src/inc/list.h index 7e3850a..810aff0 100644 --- a/code_c/Maasha/src/inc/list.h +++ b/code_c/Maasha/src/inc/list.h @@ -1,21 +1,94 @@ + + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ + + +/* Singly linked list with a pointer to the next element and a pointer to a value. */ +struct _list +{ + struct _list *next; + void *val; +}; + +typedef struct _list list; + +/* Singly linked list with a pointer to the next element and an integer value. */ +struct _list_int +{ + struct _list_int *next; + int val; +}; + +typedef struct _list_int list_int; + +/* Doubly linked list node. */ +struct _node_dl +{ + struct _node_dl *next; /* Pointer to next node - NULL if last. */ + struct _node_dl *prev; /* Pointer to previous node - NULL if last. */ + void *val; /* Pointer to data value. */ +}; + +typedef struct _node_dl node_dl; + +/* Doubly linked list. */ +struct _list_dl +{ + node_dl *first; /* Pointer to first node - NULL for empty list. */ + node_dl *last; /* Pointer to last node - NULL for empty list. */ +}; + +typedef struct _list_dl list_dl; + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FUNCTION DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ + + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SINGLY LINLED LIST <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ + + /* Add a new singly linked list element with a pointer. */ -void list_add( struct list **list_ppt, void *val ); +void list_add( list **list_ppt, void *val ); /* Add a new singly linked list element with an integer. */ -void list_add_int( struct list_int **list_ppt, int val ); +void list_add_int( list_int **list_ppt, int val ); /* Reverse the order of elements in a singly linked list. */ -void list_reverse( void *old_list ); +/* Usage: list_reverse( &list ) */ +void list_reverse( void *list_pt ); /* Check if a given string exists in a singly linked list. */ -bool list_exists( struct list *list_pt, char *string ); +bool list_exists( list *list_pt, char *string ); /* Check if a given integer exists in a singly linked list. */ -bool list_exists_int( struct list_int *list_pt, int val ); +bool list_exists_int( list_int *list_pt, int val ); /* Free memory for all elements of a singly linked list. */ -void list_free( void *list_pt ); +void list_free( void *list_pt ); /* Debug function to print all elements from a singly linked list. */ -void list_print( struct list *list_pt ); +void list_print( void *list_pt ); + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DOUBLY LINKED LIST <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ + +/* Initialize a new doubly linked list. */ +void list_dl_new( list_dl **list_ppt ); + +/* Add a new node to the beginning of a doubly linked list. */ +void list_dl_add_beg( list_dl **list_ppt, node_dl **node_ppt ); + +/* Add a new node to the end of a doubly linked list. */ +void list_dl_add_end( list_dl **list_ppt, node_dl **node_ppt ); + +/* Add a new node before a given node of a doubly linked list. */ +void list_dl_add_before( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt ); + +/* Add a new node after a given node of a doubly linked list. */ +void list_dl_add_after( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt ); + +/* Remove a node from a doubly linked list. */ +void list_dl_remove( list_dl **list_ppt, node_dl **node_ppt ); + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ + + diff --git a/code_c/Maasha/src/inc/mem.h b/code_c/Maasha/src/inc/mem.h index 99c22ef..3c83c41 100644 --- a/code_c/Maasha/src/inc/mem.h +++ b/code_c/Maasha/src/inc/mem.h @@ -14,4 +14,5 @@ void *mem_resize_zero( void *pt, size_t old_size, size_t new_size ); void *mem_clone( void *old_pt, size_t size ); /* Free memory from a given pointer. */ -void mem_free( void **ppt ); +/* Usage: mem_free2( &pt ) */ +void mem_free( void *pt ); diff --git a/code_c/Maasha/src/lib/fasta.c b/code_c/Maasha/src/lib/fasta.c index 3f24693..50c0cbb 100644 --- a/code_c/Maasha/src/lib/fasta.c +++ b/code_c/Maasha/src/lib/fasta.c @@ -115,15 +115,15 @@ void fasta_put_entry( seq_entry *entry ) //} -void fasta_put_entries( struct list *entries ) -{ - /* Martin A. Hansen, May 2008 */ - - /* Output a list of sequence entries as FASTA records. */ - - struct list *elem; - - for ( elem = entries; elem != NULL; elem = elem->next ) { - fasta_put_entry( elem->val ); - } -} +//void fasta_put_entries( struct list *entries ) +//{ +// /* Martin A. Hansen, May 2008 */ +// +// /* Output a list of sequence entries as FASTA records. */ +// +// struct list *elem; +// +// for ( elem = entries; elem != NULL; elem = elem->next ) { +// fasta_put_entry( elem->val ); +// } +//} diff --git a/code_c/Maasha/src/lib/filesys.c b/code_c/Maasha/src/lib/filesys.c index 3c80226..4cbbec2 100644 --- a/code_c/Maasha/src/lib/filesys.c +++ b/code_c/Maasha/src/lib/filesys.c @@ -207,7 +207,7 @@ size_t buffer_read( file_buffer **buffer_ppt ) buffer->str[ new_end ] = '\0'; buffer->buffer_end = new_end; - mem_free( ( void * ) &str ); + mem_free( &str ); } buffer->eof = feof( buffer->fp ) ? TRUE : FALSE; @@ -414,8 +414,10 @@ void buffer_destroy( file_buffer **buffer_ppt ) close_stream( buffer->fp ); - mem_free( ( void * ) &buffer->str ); - mem_free( ( void * ) &buffer ); + mem_free( &buffer->str ); + mem_free( &buffer ); + + buffer = NULL; } diff --git a/code_c/Maasha/src/lib/hash.c b/code_c/Maasha/src/lib/hash.c index 51c7129..acdc991 100644 --- a/code_c/Maasha/src/lib/hash.c +++ b/code_c/Maasha/src/lib/hash.c @@ -142,9 +142,9 @@ void hash_destroy( struct hash *myhash ) { for ( bucket = myhash->table[ i ]; bucket != NULL; bucket = bucket->next ) { - mem_free( ( void * ) &bucket->key ); + mem_free( &bucket->key ); // mem_free( bucket->val ); - mem_free( ( void * ) &bucket ); + mem_free( &bucket ); } } diff --git a/code_c/Maasha/src/lib/list.c b/code_c/Maasha/src/lib/list.c index 9105d66..4525dd8 100644 --- a/code_c/Maasha/src/lib/list.c +++ b/code_c/Maasha/src/lib/list.c @@ -3,72 +3,75 @@ #include "list.h" -void list_add( struct list **list_ppt, void *val ) +void list_add( list **list_ppt, void *val ) { /* Martin A. Hansen, May 2008 */ /* Add a new singly linked list element with a pointer. */ - struct list *elem = NULL; + list *list_pt = *list_ppt; + list *new = NULL; - elem = mem_get( sizeof( elem ) ); + new = mem_get( sizeof( list ) ); - elem->val = val; - elem->next = *( list_ppt ); - *( list_ppt ) = ( elem ); + new->val = val; + new->next = list_pt; + + *list_ppt = new; } -void list_add_int( struct list_int **list_ppt, int val ) +void list_add_int( list_int **list_ppt, int val ) { /* Martin A. Hansen, May 2008 */ /* Add a new singly linked list element with a integer. */ - struct list_int *elem = NULL; + list_int *elem = NULL; - elem = mem_get( sizeof( elem ) ); + elem = mem_get( sizeof( list_int ) ); - elem->val = val; -// elem->next = *( list_ppt ); - *( list_ppt ) = ( elem ); + elem->val = val; + elem->next = *list_ppt; + *list_ppt = elem; } -void list_reverse( void *old_list ) +void list_reverse( void *list_pt ) { - /* Martin A. Hansen, May 2008 */ + /* Martin A. Hansen, August 2008 */ /* Reverse the order of elements in a singly linked list. */ - struct list **ppt = ( struct list ** ) old_list; - struct list *new_list = NULL; - struct list *elem; - struct list *next; + list **list_ppt = ( list ** ) list_pt; + list *pt = *list_ppt; + list *new = NULL; + list *next = NULL; + list *temp = NULL; - next = *ppt; + next = pt; while ( next != NULL ) { - elem = next; - next = elem->next; - elem->next = new_list; - new_list = elem; + temp = next; + next = temp->next; + temp->next = new; + new = temp; } - *ppt = new_list; + *list_ppt = new; } -bool list_exists( struct list *list_pt, char *string ) +bool list_exists( list *list_pt, char *string ) { /* Martin A. Hansen, June 2008 */ /* Check if a given string exists in a singly linked list. */ - struct list *elem; + list *elem = NULL; - elem = mem_get( sizeof( elem ) ); + elem = mem_get( sizeof( list ) ); for ( elem = list_pt; elem != NULL; elem = elem->next ) { @@ -81,17 +84,17 @@ bool list_exists( struct list *list_pt, char *string ) } -bool list_exists_int( struct list_int *list_pt, int val ) +bool list_exists_int( list_int *list_pt, int val ) { /* Martin A. Hansen, June 2008 */ /* Check if a given integer exists in a singly linked list. */ - struct list_int *elem; + list_int *elem = NULL; - elem = mem_get( sizeof( elem ) ); + elem = mem_get( sizeof( list_int ) ); -// for ( elem = list_pt; elem != NULL; elem = elem->next ) + for ( elem = list_pt; elem != NULL; elem = elem->next ) { if ( elem->val == val ) { return TRUE; @@ -108,36 +111,171 @@ void list_free( void *list_pt ) /* Free memory for all elements of a singly linked list. */ - struct list **ppt = ( struct list ** ) list_pt; - struct list *next = *ppt; - struct list *elem; + list **list_ppt = ( list ** ) list_pt; + list *next = *list_ppt; + list *elem = NULL; while ( next != NULL ) { elem = next; + +// printf( "elem->val: %s\n", ( char * ) elem->val ); + next = elem->next; - mem_free( ( void * ) &elem ); +// mem_free( &elem ); + free( elem ); } - ppt = NULL; + *list_ppt = NULL; } -void list_print( struct list *list_pt ) +void list_print( void *list_pt ) { /* Martin A. Hansen, June 2008 */ /* Debug function to print all elements from a singly linked list. */ - int i = 0; + list *first = ( list * ) list_pt; + list *elem = NULL; + int i = 0; - struct list *elem; - - for ( elem = list_pt; elem != NULL; elem = elem->next ) + for ( elem = first; elem != NULL; elem = elem->next ) { - printf( "elem %d: ->%s<-\n", i, ( char * ) elem->val ); + printf( "Elem %d: ->%s<-\n", i, ( char * ) elem->val ); i++; } } + +void list_dl_new( list_dl **list_ppt ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Initialize a new doubly linked list. */ + + list_dl *new = NULL; + + new = mem_get( sizeof( list_dl ) ); + + new->first = NULL; + new->last = NULL; + + *list_ppt = new; +} + + +void list_dl_add_beg( list_dl **list_ppt, node_dl **node_ppt ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Add a new node to the beginning of a doubly linked list. */ + + list_dl *list_pt = *list_ppt; + node_dl *node_pt = *node_ppt; + + if ( list_pt->first == NULL ) + { + list_pt->first = node_pt; + list_pt->last = node_pt; + node_pt->next = NULL; + node_pt->prev = NULL; + } + else + { + list_dl_add_before( &list_pt, &list_pt->first, &node_pt ); + } +} + + +void list_dl_add_end( list_dl **list_ppt, node_dl **node_ppt ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Add a new node to the end of a doubly linked list. */ + + list_dl *list_pt = *list_ppt; + node_dl *node_pt = *node_ppt; + + if ( list_pt->last == NULL ) { + list_dl_add_beg( &list_pt, &node_pt ); + } else { + list_dl_add_after( &list_pt, &list_pt->last, &node_pt ); + } +} + + +void list_dl_add_before( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Add a new node before a given node of a doubly linked list. */ + + list_dl *list_pt = *list_ppt; + node_dl *node_pt = *node_ppt; + node_dl *new_node_pt = *new_node_ppt; + + new_node_pt->prev = node_pt->prev; + new_node_pt->next = node_pt; + + if ( node_pt->prev == NULL ) { + list_pt->first = new_node_pt; + } else { + node_pt->prev->next = new_node_pt; + } + + node_pt->prev = new_node_pt; +} + + +void list_dl_add_after( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Add a new node after a given node of a doubly linked list. */ + + list_dl *list_pt = *list_ppt; + node_dl *node_pt = *node_ppt; + node_dl *new_node_pt = *new_node_ppt; + + new_node_pt->prev = node_pt; + new_node_pt->next = node_pt->next; + + if ( node_pt->next == NULL ) { + list_pt->last = new_node_pt; + } else { + node_pt->next->prev = new_node_pt; + } + + node_pt->next = new_node_pt; +} + + +void list_dl_remove( list_dl **list_ppt, node_dl **node_ppt ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Remove a node from a doubly linked list. */ + + list_dl *list_pt = *list_ppt; + node_dl *node_pt = *node_ppt; + + if ( node_pt->prev == NULL ) { + list_pt->first = node_pt->next; + } else { + node_pt->prev->next = node_pt->next; + } + + if ( node_pt->next == NULL ) { + list_pt->last = node_pt->prev; + } else { + node_pt->next->prev = node_pt->prev; + } + + mem_free( &node_pt ); + +// *node_ppt = NULL; +} + + diff --git a/code_c/Maasha/src/lib/mem.c b/code_c/Maasha/src/lib/mem.c index 03f49d5..809a038 100644 --- a/code_c/Maasha/src/lib/mem.c +++ b/code_c/Maasha/src/lib/mem.c @@ -106,7 +106,7 @@ void *mem_clone( void *old_pt, size_t size ) } -void mem_free( void **ppt ) +void mem_free( void *pt ) { /* Martin A. Hansen, May 2008 */ @@ -114,7 +114,15 @@ void mem_free( void **ppt ) /* Free memory from a given pointer. */ - free( *ppt ); + void **ppt = ( void ** ) pt; + void *p = *ppt; + + if ( p != NULL ) + { + free( p ); + + p = NULL; + } *ppt = NULL; } diff --git a/code_c/Maasha/src/test/Makefile b/code_c/Maasha/src/test/Makefile index 38dbcb1..3d1cbab 100644 --- a/code_c/Maasha/src/test/Makefile +++ b/code_c/Maasha/src/test/Makefile @@ -9,7 +9,7 @@ LIB = -lm $(LIB_DIR)*.o all: test -test: test_common test_fasta test_filesys test_mem test_seq test_strings +test: test_common test_fasta test_filesys test_list test_mem test_seq test_strings test_common: test_common.c $(LIB_DIR)common.c $(CC) $(Cflags) $(INC) $(LIB) test_common.c -o test_common @@ -20,6 +20,9 @@ test_fasta: test_fasta.c $(LIB_DIR)fasta.c test_filesys: test_filesys.c $(LIB_DIR)filesys.c $(CC) $(Cflags) $(INC) $(LIB) test_filesys.c -o test_filesys +test_list: test_list.c $(LIB_DIR)list.c + $(CC) $(Cflags) $(INC) $(LIB) test_list.c -o test_list + test_mem: test_mem.c $(LIB_DIR)mem.c $(CC) $(Cflags) $(INC) $(LIB) test_mem.c -o test_mem @@ -33,6 +36,7 @@ clean: rm test_common rm test_fasta rm test_filesys + rm test_list rm test_mem rm test_seq rm test_strings diff --git a/code_c/Maasha/src/test/test_fasta.c b/code_c/Maasha/src/test/test_fasta.c index 25e193e..c7de1dd 100644 --- a/code_c/Maasha/src/test/test_fasta.c +++ b/code_c/Maasha/src/test/test_fasta.c @@ -35,12 +35,12 @@ void test_fasta_get_entry() size_t max_seq_name = MAX_SEQ_NAME; size_t max_seq = MAX_SEQ; - fp = read_open( TEST_FILE2 ); + fp = read_open( TEST_FILE1 ); seq_new( &entry, max_seq_name, max_seq ); while ( ( fasta_get_entry( fp, &entry ) != FALSE ) ) { - printf( "seq_name: %s seq_len: %zu\n", entry->seq_name, entry->seq_len ); +// printf( "seq_name: %s seq_len: %zu\n", entry->seq_name, entry->seq_len ); } seq_destroy( entry ); @@ -65,7 +65,7 @@ void test_fasta_put_entry() seq_new( &entry, max_seq_name, max_seq ); while ( ( fasta_get_entry( fp, &entry ) != FALSE ) ) { - fasta_put_entry( entry ); +// fasta_put_entry( entry ); } seq_destroy( entry ); diff --git a/code_c/Maasha/src/test/test_filesys.c b/code_c/Maasha/src/test/test_filesys.c index f3c3878..c75c6d4 100644 --- a/code_c/Maasha/src/test/test_filesys.c +++ b/code_c/Maasha/src/test/test_filesys.c @@ -598,12 +598,6 @@ void test_buffer_destroy() buffer_destroy( &buffer ); - assert( buffer->str == NULL ); - - buffer = NULL; - - assert( buffer == NULL ); - file_unlink( file ); fprintf( stderr, "OK\n" ); diff --git a/code_c/Maasha/src/test/test_mem.c b/code_c/Maasha/src/test/test_mem.c index faa6a91..7327587 100644 --- a/code_c/Maasha/src/test/test_mem.c +++ b/code_c/Maasha/src/test/test_mem.c @@ -139,29 +139,24 @@ void test_mem_free() { fprintf( stderr, " Testing mem_free ... " ); - char *pt0 = "foo"; - void *pt1 = NULL; - char *pt2 = NULL; - - assert( pt0 != NULL ); - assert( pt1 == NULL ); - assert( pt2 == NULL ); + uint mem = 500000000; + int i = 0; + char *pt = NULL; - pt1 = mem_get( 100 ); - pt2 = mem_get( 100 ); + pt = mem_get( mem ); - assert( pt1 != NULL ); - assert( pt2 != NULL ); + for ( i = 0; i < mem; i++ ) + { + + } - memcpy( pt2, pt0, 3 ); + assert( pt != NULL ); - mem_free( &pt1 ); - mem_free( &pt1 ); - mem_free( ( void * ) &pt1 ); - mem_free( ( void * ) &pt2 ); + mem_free( &pt ); - assert( pt1 == NULL ); - assert( pt2 == NULL ); + assert( pt == NULL ); fprintf( stderr, "OK\n" ); } + + diff --git a/code_c/Maasha/src/testall.pl b/code_c/Maasha/src/testall.pl index 425e8e7..4c9d2f1 100755 --- a/code_c/Maasha/src/testall.pl +++ b/code_c/Maasha/src/testall.pl @@ -11,6 +11,7 @@ $test_dir = "test"; test_common test_fasta test_filesys + test_list test_mem test_strings ); diff --git a/code_c/Maasha/src/tetra_count.c b/code_c/Maasha/src/tetra_count.c index c47b08b..5d7a481 100644 --- a/code_c/Maasha/src/tetra_count.c +++ b/code_c/Maasha/src/tetra_count.c @@ -4,37 +4,38 @@ #include "seq.h" #include "fasta.h" -#define BLOCK32 15 -#define ARRAY_SIZE ( 1 << ( BLOCK32 * 2 ) ) +#define BITS32 15 +#define ARRAY_SIZE ( 1 << ( BITS32 * 2 ) ) #define BLOCK_SIZE 4 #define MAX_SPACE 256 -#define T 3 /* 11 on the rightmost two bits of bin. */ -#define C 1 /* 01 on the rightmost two bits of bin. */ -#define G 2 /* 10 on the rightmost two bits of bin. */ +#define add_A( c ) /* add 00 on the rightmost two bits of c (i. e. do nothing). */ +#define add_T( c ) ( c |= 3 ) /* add 11 on the rightmost two bits of c. */ +#define add_C( c ) ( c |= 1 ) /* add 01 on the rightmost two bits of c. */ +#define add_G( c ) ( c |= 2 ) /* add 10 on the rightmost two bits of c. */ + +typedef uint bits32; uint mask_create( int size ); -void block_count( char *path, uint **array_ppt, seq_entry **entry_ppt, uint mask ); +void block_count( char *path, uint **bit32_array_ppt, seq_entry **entry_ppt, uint mask ); int main( int argc, char *argv[] ) { - uint mask = 0; - int i = 0; - uint *array = NULL; - seq_entry *entry = NULL; + uint mask = 0; + int i = 0; + uint *bit32_array = NULL; + seq_entry *entry = NULL; - mask = mask_create( BLOCK_SIZE ); + mask = mask_create( BLOCK_SIZE ); - array = mem_get_zero( sizeof( uint ) * ARRAY_SIZE ); + bit32_array = mem_get_zero( sizeof( uint ) * ARRAY_SIZE ); seq_new( &entry, MAX_SEQ_NAME, MAX_SEQ ); - printf( "mask: %d\n", mask ); - for ( i = 1; i < argc; i++ ) { - block_count( argv[ i ], &array, &entry, mask ); + block_count( argv[ i ], &bit32_array, &entry, mask ); } return EXIT_SUCCESS; @@ -61,18 +62,19 @@ uint mask_create( int size ) } -void block_count( char *path, uint **array_ppt, seq_entry **entry_ppt, uint mask ) +void block_count( char *path, uint **bit32_array_ppt, seq_entry **entry_ppt, uint mask ) { /* Martin A. Hansen, August 2008 */ - /* Scan a FASTA file for ... */ + /* Scan a FASTA file for bi-partive motifs consisting of two */ + /* blocks of BLOCKS_SIZE separated by up to MAX_SPACE space. */ - uint *array = *array_ppt; - seq_entry *entry = *entry_ppt; - FILE *fp = NULL; - uint i = 0; - uint j = 0; - uint bin1 = 0; + uint *bit32_array = *bit32_array_ppt; + seq_entry *entry = *entry_ppt; + FILE *fp = NULL; + uint i = 0; + uint j = 0; + uint bin = 0; fp = read_open( path ); @@ -82,25 +84,25 @@ void block_count( char *path, uint **array_ppt, seq_entry **entry_ppt, uint mask { fprintf( stderr, " Counting blocks in: %s ... ", entry->seq_name ); - bin1 = 0; - j = 0; + bin = 0; + j = 0; for ( i = 0; entry->seq[ i ]; i++ ) { - bin1 <<= 2; + bin <<= 2; switch( entry->seq[ i ] ) { - case 'A': case 'a': j++; break; - case 'T': case 't': bin1 |= T; j++; break; - case 'C': case 'c': bin1 |= C; j++; break; - case 'G': case 'g': bin1 |= G; j++; break; - default: bin1 = 0; j = 0; break; + case 'A': case 'a': add_A( bin ); j++; break; + case 'T': case 't': add_T( bin ); j++; break; + case 'C': case 'c': add_C( bin ); j++; break; + case 'G': case 'g': add_G( bin ); j++; break; + default: bin = 0; j = 0; break; } - if ( j >= BLOCK32 ) + if ( j >= BLOCK_SIZE - 1 ) { - array[ ( bin1 & mask ) ]++; + bit32_array[ ( bin & mask ) ]++; /* printf( "\n" ); printf( "mask : %s\n", bits2string( mask ) ); @@ -117,3 +119,5 @@ void block_count( char *path, uint **array_ppt, seq_entry **entry_ppt, uint mask close_stream( fp ); } + + diff --git a/code_perl/Maasha/Fasta.pm b/code_perl/Maasha/Fasta.pm index 82307f9..0689de7 100644 --- a/code_perl/Maasha/Fasta.pm +++ b/code_perl/Maasha/Fasta.pm @@ -174,7 +174,8 @@ sub get_entry return if not defined $block; - $block =~ />?([^\n]+)\n/m; + # $block =~ />?([^\n]+)\n/m; + $block =~ /^>?(.+)(\n|\r)/m; $seq_name = $1; $seq = $'; @@ -182,7 +183,7 @@ sub get_entry chomp $seq; - $seq =~ tr/ \t\n//d; + $seq =~ tr/ \t\n\r//d; $entry = [ $seq_name, $seq ]; -- 2.39.5