#define die assert( DEBUG_EXIT )
-/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
-
-
-// At some point test if typdef struct list list will allow us to move all this stuff to list.h and list.c
-
-
-/* Singly linked list with a pointer to the next element and a pointer to a value. */
-struct list
-{
- struct list *next;
- void *val;
-};
-
-/* Singly linked list with a pointer to the next element and an integer value. */
-struct list_int
-{
- struct list *next;
- int val;
-};
-
-
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
void fasta_put_entry( seq_entry *entry );
/* Get all sequence entries from a FASTA file in a list. */
-void fasta_get_entries( FILE *fp, struct list **entries );
+//void fasta_get_entries( FILE *fp, struct list **entries );
/* Output all sequence entries from a list in FASTA format. */
-void fasta_put_entries( struct list *entries );
+//void fasta_put_entries( struct list *entries );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Singly linked list with a pointer to the next element and a pointer to a value. */
+struct _list
+{
+ struct _list *next;
+ void *val;
+};
+
+typedef struct _list list;
+
+/* Singly linked list with a pointer to the next element and an integer value. */
+struct _list_int
+{
+ struct _list_int *next;
+ int val;
+};
+
+typedef struct _list_int list_int;
+
+/* Doubly linked list node. */
+struct _node_dl
+{
+ struct _node_dl *next; /* Pointer to next node - NULL if last. */
+ struct _node_dl *prev; /* Pointer to previous node - NULL if last. */
+ void *val; /* Pointer to data value. */
+};
+
+typedef struct _node_dl node_dl;
+
+/* Doubly linked list. */
+struct _list_dl
+{
+ node_dl *first; /* Pointer to first node - NULL for empty list. */
+ node_dl *last; /* Pointer to last node - NULL for empty list. */
+};
+
+typedef struct _list_dl list_dl;
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FUNCTION DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SINGLY LINLED LIST <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
/* Add a new singly linked list element with a pointer. */
-void list_add( struct list **list_ppt, void *val );
+void list_add( list **list_ppt, void *val );
/* Add a new singly linked list element with an integer. */
-void list_add_int( struct list_int **list_ppt, int val );
+void list_add_int( list_int **list_ppt, int val );
/* Reverse the order of elements in a singly linked list. */
-void list_reverse( void *old_list );
+/* Usage: list_reverse( &list ) */
+void list_reverse( void *list_pt );
/* Check if a given string exists in a singly linked list. */
-bool list_exists( struct list *list_pt, char *string );
+bool list_exists( list *list_pt, char *string );
/* Check if a given integer exists in a singly linked list. */
-bool list_exists_int( struct list_int *list_pt, int val );
+bool list_exists_int( list_int *list_pt, int val );
/* Free memory for all elements of a singly linked list. */
-void list_free( void *list_pt );
+void list_free( void *list_pt );
/* Debug function to print all elements from a singly linked list. */
-void list_print( struct list *list_pt );
+void list_print( void *list_pt );
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DOUBLY LINKED LIST <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+/* Initialize a new doubly linked list. */
+void list_dl_new( list_dl **list_ppt );
+
+/* Add a new node to the beginning of a doubly linked list. */
+void list_dl_add_beg( list_dl **list_ppt, node_dl **node_ppt );
+
+/* Add a new node to the end of a doubly linked list. */
+void list_dl_add_end( list_dl **list_ppt, node_dl **node_ppt );
+
+/* Add a new node before a given node of a doubly linked list. */
+void list_dl_add_before( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt );
+
+/* Add a new node after a given node of a doubly linked list. */
+void list_dl_add_after( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt );
+
+/* Remove a node from a doubly linked list. */
+void list_dl_remove( list_dl **list_ppt, node_dl **node_ppt );
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
void *mem_clone( void *old_pt, size_t size );
/* Free memory from a given pointer. */
-void mem_free( void **ppt );
+/* Usage: mem_free2( &pt ) */
+void mem_free( void *pt );
//}
-void fasta_put_entries( struct list *entries )
-{
- /* Martin A. Hansen, May 2008 */
-
- /* Output a list of sequence entries as FASTA records. */
-
- struct list *elem;
-
- for ( elem = entries; elem != NULL; elem = elem->next ) {
- fasta_put_entry( elem->val );
- }
-}
+//void fasta_put_entries( struct list *entries )
+//{
+// /* Martin A. Hansen, May 2008 */
+//
+// /* Output a list of sequence entries as FASTA records. */
+//
+// struct list *elem;
+//
+// for ( elem = entries; elem != NULL; elem = elem->next ) {
+// fasta_put_entry( elem->val );
+// }
+//}
buffer->str[ new_end ] = '\0';
buffer->buffer_end = new_end;
- mem_free( ( void * ) &str );
+ mem_free( &str );
}
buffer->eof = feof( buffer->fp ) ? TRUE : FALSE;
close_stream( buffer->fp );
- mem_free( ( void * ) &buffer->str );
- mem_free( ( void * ) &buffer );
+ mem_free( &buffer->str );
+ mem_free( &buffer );
+
+ buffer = NULL;
}
{
for ( bucket = myhash->table[ i ]; bucket != NULL; bucket = bucket->next )
{
- mem_free( ( void * ) &bucket->key );
+ mem_free( &bucket->key );
// mem_free( bucket->val );
- mem_free( ( void * ) &bucket );
+ mem_free( &bucket );
}
}
#include "list.h"
-void list_add( struct list **list_ppt, void *val )
+void list_add( list **list_ppt, void *val )
{
/* Martin A. Hansen, May 2008 */
/* Add a new singly linked list element with a pointer. */
- struct list *elem = NULL;
+ list *list_pt = *list_ppt;
+ list *new = NULL;
- elem = mem_get( sizeof( elem ) );
+ new = mem_get( sizeof( list ) );
- elem->val = val;
- elem->next = *( list_ppt );
- *( list_ppt ) = ( elem );
+ new->val = val;
+ new->next = list_pt;
+
+ *list_ppt = new;
}
-void list_add_int( struct list_int **list_ppt, int val )
+void list_add_int( list_int **list_ppt, int val )
{
/* Martin A. Hansen, May 2008 */
/* Add a new singly linked list element with a integer. */
- struct list_int *elem = NULL;
+ list_int *elem = NULL;
- elem = mem_get( sizeof( elem ) );
+ elem = mem_get( sizeof( list_int ) );
- elem->val = val;
-// elem->next = *( list_ppt );
- *( list_ppt ) = ( elem );
+ elem->val = val;
+ elem->next = *list_ppt;
+ *list_ppt = elem;
}
-void list_reverse( void *old_list )
+void list_reverse( void *list_pt )
{
- /* Martin A. Hansen, May 2008 */
+ /* Martin A. Hansen, August 2008 */
/* Reverse the order of elements in a singly linked list. */
- struct list **ppt = ( struct list ** ) old_list;
- struct list *new_list = NULL;
- struct list *elem;
- struct list *next;
+ list **list_ppt = ( list ** ) list_pt;
+ list *pt = *list_ppt;
+ list *new = NULL;
+ list *next = NULL;
+ list *temp = NULL;
- next = *ppt;
+ next = pt;
while ( next != NULL )
{
- elem = next;
- next = elem->next;
- elem->next = new_list;
- new_list = elem;
+ temp = next;
+ next = temp->next;
+ temp->next = new;
+ new = temp;
}
- *ppt = new_list;
+ *list_ppt = new;
}
-bool list_exists( struct list *list_pt, char *string )
+bool list_exists( list *list_pt, char *string )
{
/* Martin A. Hansen, June 2008 */
/* Check if a given string exists in a singly linked list. */
- struct list *elem;
+ list *elem = NULL;
- elem = mem_get( sizeof( elem ) );
+ elem = mem_get( sizeof( list ) );
for ( elem = list_pt; elem != NULL; elem = elem->next )
{
}
-bool list_exists_int( struct list_int *list_pt, int val )
+bool list_exists_int( list_int *list_pt, int val )
{
/* Martin A. Hansen, June 2008 */
/* Check if a given integer exists in a singly linked list. */
- struct list_int *elem;
+ list_int *elem = NULL;
- elem = mem_get( sizeof( elem ) );
+ elem = mem_get( sizeof( list_int ) );
-// for ( elem = list_pt; elem != NULL; elem = elem->next )
+ for ( elem = list_pt; elem != NULL; elem = elem->next )
{
if ( elem->val == val ) {
return TRUE;
/* Free memory for all elements of a singly linked list. */
- struct list **ppt = ( struct list ** ) list_pt;
- struct list *next = *ppt;
- struct list *elem;
+ list **list_ppt = ( list ** ) list_pt;
+ list *next = *list_ppt;
+ list *elem = NULL;
while ( next != NULL )
{
elem = next;
+
+// printf( "elem->val: %s\n", ( char * ) elem->val );
+
next = elem->next;
- mem_free( ( void * ) &elem );
+// mem_free( &elem );
+ free( elem );
}
- ppt = NULL;
+ *list_ppt = NULL;
}
-void list_print( struct list *list_pt )
+void list_print( void *list_pt )
{
/* Martin A. Hansen, June 2008 */
/* Debug function to print all elements from a singly linked list. */
- int i = 0;
+ list *first = ( list * ) list_pt;
+ list *elem = NULL;
+ int i = 0;
- struct list *elem;
-
- for ( elem = list_pt; elem != NULL; elem = elem->next )
+ for ( elem = first; elem != NULL; elem = elem->next )
{
- printf( "elem %d: ->%s<-\n", i, ( char * ) elem->val );
+ printf( "Elem %d: ->%s<-\n", i, ( char * ) elem->val );
i++;
}
}
+
+void list_dl_new( list_dl **list_ppt )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Initialize a new doubly linked list. */
+
+ list_dl *new = NULL;
+
+ new = mem_get( sizeof( list_dl ) );
+
+ new->first = NULL;
+ new->last = NULL;
+
+ *list_ppt = new;
+}
+
+
+void list_dl_add_beg( list_dl **list_ppt, node_dl **node_ppt )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Add a new node to the beginning of a doubly linked list. */
+
+ list_dl *list_pt = *list_ppt;
+ node_dl *node_pt = *node_ppt;
+
+ if ( list_pt->first == NULL )
+ {
+ list_pt->first = node_pt;
+ list_pt->last = node_pt;
+ node_pt->next = NULL;
+ node_pt->prev = NULL;
+ }
+ else
+ {
+ list_dl_add_before( &list_pt, &list_pt->first, &node_pt );
+ }
+}
+
+
+void list_dl_add_end( list_dl **list_ppt, node_dl **node_ppt )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Add a new node to the end of a doubly linked list. */
+
+ list_dl *list_pt = *list_ppt;
+ node_dl *node_pt = *node_ppt;
+
+ if ( list_pt->last == NULL ) {
+ list_dl_add_beg( &list_pt, &node_pt );
+ } else {
+ list_dl_add_after( &list_pt, &list_pt->last, &node_pt );
+ }
+}
+
+
+void list_dl_add_before( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Add a new node before a given node of a doubly linked list. */
+
+ list_dl *list_pt = *list_ppt;
+ node_dl *node_pt = *node_ppt;
+ node_dl *new_node_pt = *new_node_ppt;
+
+ new_node_pt->prev = node_pt->prev;
+ new_node_pt->next = node_pt;
+
+ if ( node_pt->prev == NULL ) {
+ list_pt->first = new_node_pt;
+ } else {
+ node_pt->prev->next = new_node_pt;
+ }
+
+ node_pt->prev = new_node_pt;
+}
+
+
+void list_dl_add_after( list_dl **list_ppt, node_dl **node_ppt, node_dl **new_node_ppt )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Add a new node after a given node of a doubly linked list. */
+
+ list_dl *list_pt = *list_ppt;
+ node_dl *node_pt = *node_ppt;
+ node_dl *new_node_pt = *new_node_ppt;
+
+ new_node_pt->prev = node_pt;
+ new_node_pt->next = node_pt->next;
+
+ if ( node_pt->next == NULL ) {
+ list_pt->last = new_node_pt;
+ } else {
+ node_pt->next->prev = new_node_pt;
+ }
+
+ node_pt->next = new_node_pt;
+}
+
+
+void list_dl_remove( list_dl **list_ppt, node_dl **node_ppt )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Remove a node from a doubly linked list. */
+
+ list_dl *list_pt = *list_ppt;
+ node_dl *node_pt = *node_ppt;
+
+ if ( node_pt->prev == NULL ) {
+ list_pt->first = node_pt->next;
+ } else {
+ node_pt->prev->next = node_pt->next;
+ }
+
+ if ( node_pt->next == NULL ) {
+ list_pt->last = node_pt->prev;
+ } else {
+ node_pt->next->prev = node_pt->prev;
+ }
+
+ mem_free( &node_pt );
+
+// *node_ppt = NULL;
+}
+
+
}
-void mem_free( void **ppt )
+void mem_free( void *pt )
{
/* Martin A. Hansen, May 2008 */
/* Free memory from a given pointer. */
- free( *ppt );
+ void **ppt = ( void ** ) pt;
+ void *p = *ppt;
+
+ if ( p != NULL )
+ {
+ free( p );
+
+ p = NULL;
+ }
*ppt = NULL;
}
all: test
-test: test_common test_fasta test_filesys test_mem test_seq test_strings
+test: test_common test_fasta test_filesys test_list test_mem test_seq test_strings
test_common: test_common.c $(LIB_DIR)common.c
$(CC) $(Cflags) $(INC) $(LIB) test_common.c -o test_common
test_filesys: test_filesys.c $(LIB_DIR)filesys.c
$(CC) $(Cflags) $(INC) $(LIB) test_filesys.c -o test_filesys
+test_list: test_list.c $(LIB_DIR)list.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_list.c -o test_list
+
test_mem: test_mem.c $(LIB_DIR)mem.c
$(CC) $(Cflags) $(INC) $(LIB) test_mem.c -o test_mem
rm test_common
rm test_fasta
rm test_filesys
+ rm test_list
rm test_mem
rm test_seq
rm test_strings
size_t max_seq_name = MAX_SEQ_NAME;
size_t max_seq = MAX_SEQ;
- fp = read_open( TEST_FILE2 );
+ fp = read_open( TEST_FILE1 );
seq_new( &entry, max_seq_name, max_seq );
while ( ( fasta_get_entry( fp, &entry ) != FALSE ) ) {
- printf( "seq_name: %s seq_len: %zu\n", entry->seq_name, entry->seq_len );
+// printf( "seq_name: %s seq_len: %zu\n", entry->seq_name, entry->seq_len );
}
seq_destroy( entry );
seq_new( &entry, max_seq_name, max_seq );
while ( ( fasta_get_entry( fp, &entry ) != FALSE ) ) {
- fasta_put_entry( entry );
+// fasta_put_entry( entry );
}
seq_destroy( entry );
buffer_destroy( &buffer );
- assert( buffer->str == NULL );
-
- buffer = NULL;
-
- assert( buffer == NULL );
-
file_unlink( file );
fprintf( stderr, "OK\n" );
{
fprintf( stderr, " Testing mem_free ... " );
- char *pt0 = "foo";
- void *pt1 = NULL;
- char *pt2 = NULL;
-
- assert( pt0 != NULL );
- assert( pt1 == NULL );
- assert( pt2 == NULL );
+ uint mem = 500000000;
+ int i = 0;
+ char *pt = NULL;
- pt1 = mem_get( 100 );
- pt2 = mem_get( 100 );
+ pt = mem_get( mem );
- assert( pt1 != NULL );
- assert( pt2 != NULL );
+ for ( i = 0; i < mem; i++ )
+ {
+
+ }
- memcpy( pt2, pt0, 3 );
+ assert( pt != NULL );
- mem_free( &pt1 );
- mem_free( &pt1 );
- mem_free( ( void * ) &pt1 );
- mem_free( ( void * ) &pt2 );
+ mem_free( &pt );
- assert( pt1 == NULL );
- assert( pt2 == NULL );
+ assert( pt == NULL );
fprintf( stderr, "OK\n" );
}
+
+
test_common
test_fasta
test_filesys
+ test_list
test_mem
test_strings
);
#include "seq.h"
#include "fasta.h"
-#define BLOCK32 15
-#define ARRAY_SIZE ( 1 << ( BLOCK32 * 2 ) )
+#define BITS32 15
+#define ARRAY_SIZE ( 1 << ( BITS32 * 2 ) )
#define BLOCK_SIZE 4
#define MAX_SPACE 256
-#define T 3 /* 11 on the rightmost two bits of bin. */
-#define C 1 /* 01 on the rightmost two bits of bin. */
-#define G 2 /* 10 on the rightmost two bits of bin. */
+#define add_A( c ) /* add 00 on the rightmost two bits of c (i. e. do nothing). */
+#define add_T( c ) ( c |= 3 ) /* add 11 on the rightmost two bits of c. */
+#define add_C( c ) ( c |= 1 ) /* add 01 on the rightmost two bits of c. */
+#define add_G( c ) ( c |= 2 ) /* add 10 on the rightmost two bits of c. */
+
+typedef uint bits32;
uint mask_create( int size );
-void block_count( char *path, uint **array_ppt, seq_entry **entry_ppt, uint mask );
+void block_count( char *path, uint **bit32_array_ppt, seq_entry **entry_ppt, uint mask );
int main( int argc, char *argv[] )
{
- uint mask = 0;
- int i = 0;
- uint *array = NULL;
- seq_entry *entry = NULL;
+ uint mask = 0;
+ int i = 0;
+ uint *bit32_array = NULL;
+ seq_entry *entry = NULL;
- mask = mask_create( BLOCK_SIZE );
+ mask = mask_create( BLOCK_SIZE );
- array = mem_get_zero( sizeof( uint ) * ARRAY_SIZE );
+ bit32_array = mem_get_zero( sizeof( uint ) * ARRAY_SIZE );
seq_new( &entry, MAX_SEQ_NAME, MAX_SEQ );
- printf( "mask: %d\n", mask );
-
for ( i = 1; i < argc; i++ ) {
- block_count( argv[ i ], &array, &entry, mask );
+ block_count( argv[ i ], &bit32_array, &entry, mask );
}
return EXIT_SUCCESS;
}
-void block_count( char *path, uint **array_ppt, seq_entry **entry_ppt, uint mask )
+void block_count( char *path, uint **bit32_array_ppt, seq_entry **entry_ppt, uint mask )
{
/* Martin A. Hansen, August 2008 */
- /* Scan a FASTA file for ... */
+ /* Scan a FASTA file for bi-partive motifs consisting of two */
+ /* blocks of BLOCKS_SIZE separated by up to MAX_SPACE space. */
- uint *array = *array_ppt;
- seq_entry *entry = *entry_ppt;
- FILE *fp = NULL;
- uint i = 0;
- uint j = 0;
- uint bin1 = 0;
+ uint *bit32_array = *bit32_array_ppt;
+ seq_entry *entry = *entry_ppt;
+ FILE *fp = NULL;
+ uint i = 0;
+ uint j = 0;
+ uint bin = 0;
fp = read_open( path );
{
fprintf( stderr, " Counting blocks in: %s ... ", entry->seq_name );
- bin1 = 0;
- j = 0;
+ bin = 0;
+ j = 0;
for ( i = 0; entry->seq[ i ]; i++ )
{
- bin1 <<= 2;
+ bin <<= 2;
switch( entry->seq[ i ] )
{
- case 'A': case 'a': j++; break;
- case 'T': case 't': bin1 |= T; j++; break;
- case 'C': case 'c': bin1 |= C; j++; break;
- case 'G': case 'g': bin1 |= G; j++; break;
- default: bin1 = 0; j = 0; break;
+ case 'A': case 'a': add_A( bin ); j++; break;
+ case 'T': case 't': add_T( bin ); j++; break;
+ case 'C': case 'c': add_C( bin ); j++; break;
+ case 'G': case 'g': add_G( bin ); j++; break;
+ default: bin = 0; j = 0; break;
}
- if ( j >= BLOCK32 )
+ if ( j >= BLOCK_SIZE - 1 )
{
- array[ ( bin1 & mask ) ]++;
+ bit32_array[ ( bin & mask ) ]++;
/*
printf( "\n" );
printf( "mask : %s\n", bits2string( mask ) );
close_stream( fp );
}
+
+
return if not defined $block;
- $block =~ />?([^\n]+)\n/m;
+ # $block =~ />?([^\n]+)\n/m;
+ $block =~ /^>?(.+)(\n|\r)/m;
$seq_name = $1;
$seq = $';
chomp $seq;
- $seq =~ tr/ \t\n//d;
+ $seq =~ tr/ \t\n\r//d;
$entry = [ $seq_name, $seq ];