From 74a45560176acfe281965d0310d09c309b1b8316 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 18 Aug 2008 09:02:40 +0000 Subject: [PATCH] another c code upgrade git-svn-id: http://biopieces.googlecode.com/svn/trunk@210 74ccb610-7750-0410-82ae-013aeee3265d --- code_c/Maasha/src/inc/common.h | 19 +- code_c/Maasha/src/inc/fasta.h | 8 +- code_c/Maasha/src/inc/filesys.h | 44 ++- code_c/Maasha/src/lib/common.c | 25 -- code_c/Maasha/src/lib/fasta.c | 267 +++++++++++----- code_c/Maasha/src/lib/filesys.c | 160 +++++++--- code_c/Maasha/src/lib/seq.c | 8 +- code_c/Maasha/src/lib/ucsc.c | 6 +- code_c/Maasha/src/repeat-O-matic.c | 228 +++++++------- code_c/Maasha/src/test/Makefile | 6 +- code_c/Maasha/src/test/test_common.c | 49 +++ code_c/Maasha/src/test/test_fasta.c | 27 +- code_c/Maasha/src/test/test_filesys.c | 420 +++++++++++++++++++++++++- code_c/Maasha/src/test_all.pl | 1 + 14 files changed, 954 insertions(+), 314 deletions(-) create mode 100644 code_c/Maasha/src/test/test_common.c diff --git a/code_c/Maasha/src/inc/common.h b/code_c/Maasha/src/inc/common.h index 362fd6b..3414320 100644 --- a/code_c/Maasha/src/inc/common.h +++ b/code_c/Maasha/src/inc/common.h @@ -9,11 +9,8 @@ #include #include -/* Define a shorthand for unsigned int */ -//typedef uint unsigned int +typedef char bool; -/* Define a boolean type */ -#define bool char #define TRUE 1 #define FALSE 0 @@ -25,6 +22,10 @@ #define ABS( x ) ( ( x ) < 0 ) ? -( x ) : ( x ) #define INT( x ) ( int ) x +/* Neat debug macro. */ +#define DEBUG_EXIT 0 +#define die assert( DEBUG_EXIT ) + /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ @@ -47,16 +48,6 @@ struct list_int }; -/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ERROR HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ - - -/* Print error message to stderr and exit. */ -void die( char *error_msg ); - -/* Print warning message to stderr. */ -void warn( char *warn_msg ); - - /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ diff --git a/code_c/Maasha/src/inc/fasta.h b/code_c/Maasha/src/inc/fasta.h index 671c449..fac3023 100644 --- a/code_c/Maasha/src/inc/fasta.h +++ b/code_c/Maasha/src/inc/fasta.h @@ -1,20 +1,20 @@ #define FASTA_BUFFER 256 * 1024 /* Structure of a sequence entry. */ -struct seq_entry +struct _seq_entry { char *seq_name; char *seq; size_t seq_len; }; -typedef struct seq_entry seq_entry; +typedef struct _seq_entry seq_entry; /* Count all entries in a FASTA file given a file pointer. */ size_t fasta_count( FILE *fp ); /* Get next sequence entry from a FASTA file given a file pointer. */ -bool fasta_get_entry( FILE *fp, seq_entry **entry ); +bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ); /* Output a sequence entry in FASTA format. */ void fasta_put_entry( seq_entry *entry ); @@ -26,4 +26,4 @@ void fasta_get_entries( FILE *fp, struct list **entries ); void fasta_put_entries( struct list *entries ); /* Deallocates memory from a seq_entry. */ -void fasta_free_entry( struct seq_entry *entry ); +void fasta_free_entry( seq_entry *entry ); diff --git a/code_c/Maasha/src/inc/filesys.h b/code_c/Maasha/src/inc/filesys.h index 6f9e872..0e83317 100644 --- a/code_c/Maasha/src/inc/filesys.h +++ b/code_c/Maasha/src/inc/filesys.h @@ -1,17 +1,20 @@ -//#define FILE_BUFFER_SIZE 64 * 1024 -#define FILE_BUFFER_SIZE 1024 * 1024 +#define FILE_BUFFER_SIZE 64 * 1024 +//#define FILE_BUFFER_SIZE 1 -struct file_buffer +struct _file_buffer { FILE *fp; /* file pointer */ char *str; /* the buffer string */ - size_t pos; /* index pointing to last position where some token was found */ - size_t use; /* index indicating how much of the buffer is scanned */ - size_t end; /* end position of buffer */ - size_t size; /* default buffer size */ - bool eof; /* flag indicating that buffer reached EOF */ + size_t pos; /* index pointing to last position where some token was found */ + size_t len; /* length of some found token */ + size_t use; /* index indicating how much of the buffer is scanned */ + size_t end; /* end position of buffer */ + long size; /* default buffer size */ + bool eof; /* flag indicating that buffer reached EOF */ }; +typedef struct _file_buffer file_buffer; + /* Read-open a file and return a file pointer. */ FILE *read_open( char *file ); @@ -39,26 +42,35 @@ void file_rename( char *old_name, char *new_name ); /* Opens a file for reading and loads a new buffer.*/ -struct file_buffer *read_open_buffer( char *file ); +file_buffer *buffer_read( char *file ); /* Get the next char from a file buffer, which is resized if necessary, until EOF.*/ -char buffer_getc( struct file_buffer *buffer ); +char buffer_getc( file_buffer *buffer ); + +/* Rewinds the file buffer one char, i.e. put one char back on the buffer. */ +void buffer_ungetc( file_buffer *buffer ); /* Get the next line that is terminated by \n or EOF from a file buffer. */ -char *buffer_gets( struct file_buffer *buffer ); +char *buffer_gets( file_buffer *buffer ); -/* Increases buffer size until it is larger than len. */ -void buffer_new_size( struct file_buffer *buffer, int len ); +/* Rewind the file buffer one line, i.e. put one line back on the buffer. */ +void buffer_ungets( file_buffer *buffer ); + +/* Doubles buffer size until it is larger than len. */ +void buffer_new_size( file_buffer *buffer, long len ); /* Resize file buffer discarding any old buffer before offset, */ /* and merge remaining old buffer with a new chunk of buffer. */ -void buffer_resize( struct file_buffer *buffer ); +void buffer_resize( file_buffer *buffer ); + +/* Moves file buffer of a given size num positions to the left. */ +void buffer_move( file_buffer *buffer, size_t size, size_t num ); /* Deallocates memory and close stream used by file buffer. */ -void buffer_destroy( struct file_buffer *buffer ); +void buffer_destroy( file_buffer **buffer ); /* Debug function that prints the content of a file_buffer. */ -void buffer_print( struct file_buffer *buffer ); +void buffer_print( file_buffer *buffer ); /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ diff --git a/code_c/Maasha/src/lib/common.c b/code_c/Maasha/src/lib/common.c index fb46e12..48b71e5 100644 --- a/code_c/Maasha/src/lib/common.c +++ b/code_c/Maasha/src/lib/common.c @@ -3,31 +3,6 @@ #include "mem.h" -/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ERROR HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ - - -void die( char *msg ) -{ - /* Martin A. Hansen, May 2008 */ - - /* Print error message and exits. */ - - fprintf( stderr, "ERROR: %s\n", msg ); - - exit( 1 ); -} - - -void warn( char *msg ) -{ - /* Martin A. Hansen, May 2008 */ - - /* Print warning message and exits. */ - - fprintf( stderr, "WARNING: %s\n", msg ); -} - - /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ diff --git a/code_c/Maasha/src/lib/fasta.c b/code_c/Maasha/src/lib/fasta.c index 87458a0..5ee46c6 100644 --- a/code_c/Maasha/src/lib/fasta.c +++ b/code_c/Maasha/src/lib/fasta.c @@ -1,5 +1,6 @@ #include "common.h" #include "mem.h" +#include "filesys.h" #include "fasta.h" #include "list.h" @@ -26,109 +27,235 @@ size_t fasta_count( FILE *fp ) } -bool fasta_get_entry( FILE *fp, seq_entry **entry ) +bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ) { - /* Martin A. Hansen, May 2008 */ + /* Martin A. Hansen, August 2008 */ - /* Get next sequence entry from a FASTA file given a file pointer. */ + /* Get next sequence entry from a FASTA file given a file buffer. */ - size_t i; - size_t j; - size_t offset; + char *line; + size_t seq_name_len; size_t seq_len; - char buffer[ FASTA_BUFFER ]; - size_t buffer_len; - char *seq_name = NULL; - char *seq = NULL; - - offset = ftell( fp ); - - /* ---- Skip ahead until header line and include header ---- */ + char *seq_name; + char *seq; + size_t i; - while ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) + while ( 1 ) { - buffer_len = strlen( buffer ); - - offset += buffer_len; - - if ( ( buffer[ 0 ] == '>' ) ) + if ( ( line = buffer_gets( buffer ) ) != NULL ) { - seq_name = mem_get( buffer_len - 1 ); - - for ( i = 1; i < buffer_len - 1; i++ ) { - seq_name[ i - 1 ] = buffer[ i ]; - } + if ( line[ 0 ] == '>' ) + { + seq_name_len = buffer->len - 2; + seq_name = mem_get( seq_name_len + 1 ); - seq_name[ i ] = '\0'; + memcpy( seq_name, &line[ 1 ], seq_name_len ); - break; + break; + } } } - /* ---- Determine length of sequence ---- */ + seq = mem_get( 1 ); seq_len = 0; - while ( ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) ) + while ( 1 ) { - for ( i = 0; buffer[ i ]; i++ ) + if ( ( line = buffer_gets( buffer ) ) != NULL ) { - if ( buffer[ i ] > 32 && buffer[ i ] < 127 ) { - seq_len++; + if ( line[ 0 ] == '>' ) + { + buffer_ungets( buffer ); + + break; } - } + else + { + mem_resize( seq, seq_len + strlen( line ) ); + + for ( i = 0; line[ i ]; i++ ) + { + if ( line[ i ] > 32 && line[ i ] < 127 ) + { + seq[ seq_len ] = line[ i ]; - if ( ( buffer[ 0 ] == '>' ) ) + seq_len++; + } + } + } + } + else { - seq_len -= strlen( buffer ) - 1; - break; } } - /* ---- Allocate memory for sequence ---- */ - - seq = mem_get( seq_len + 1 ); - - /* ---- Rewind file pointer and read sequence ---- */ - - if ( fseek( fp, offset, SEEK_SET ) != 0 ) + if ( seq_len == 0 ) { - fprintf( stderr, "ERROR: fseek SEEK_SET failed: %s\n", strerror( errno ) ); - abort(); + return FALSE; } +// seq = mem_resize( seq, seq_len + 1 ); - j = 0; - - while ( ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) ) - { - for ( i = 0; buffer[ i ]; i++ ) - { - if ( buffer[ i ] > 32 && buffer[ i ] < 127 ) - { - seq[ j ] = buffer[ i ]; - - if ( j == seq_len - 1 ) - { - seq[ j + 1 ] = '\0'; + seq[ seq_len + 1 ] = '\0'; - ( *entry )->seq_name = seq_name; - ( *entry )->seq = seq; - ( *entry )->seq_len = seq_len; +// should probably use memcpy below - return TRUE; - } - - j++; - } - } - } + ( *entry )->seq_name = seq_name; + ( *entry )->seq = seq; + ( *entry )->seq_len = seq_len; return FALSE; } -void fasta_put_entry( struct seq_entry *entry ) +//bool fasta_get_entry( FILE *fp, seq_entry **entry ) +//{ +// /* Martin A. Hansen, May 2008 */ +// +// /* Unit test done.*/ +// +// /* Get next sequence entry from a FASTA file given a file pointer. */ +// +// size_t i; +// size_t offset; +// size_t seq_buffer_len; +// size_t seq_len; +// size_t buffer_read; +// char buffer[ FASTA_BUFFER ]; +// size_t buffer_len; +// char *seq_name = NULL; +// char *seq = NULL; +// +// offset = ftell( fp ); +// +// /* ---- Skip ahead until header line and include header ---- */ +// +// while ( 1 ) +// { +// if ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) +// { +// buffer_len = strlen( buffer ); +// +// offset += buffer_len; +// +// if ( ( buffer[ 0 ] == '>' ) ) +// { +// seq_name = mem_get( buffer_len - 1 ); +// +// memcpy( seq_name, &buffer[ 1 ], buffer_len - 2 ); +// +// seq_name[ buffer_len - 2 ] = '\0'; +// +// break; +// } +// } +// else +// { +// if ( ferror( fp ) != 0 ) +// { +// fprintf( stderr, "ERROR: get_fasta_seq failed: %s\n", strerror( errno ) ); +// abort(); +// } +// else if ( feof( fp ) != 0 ) +// { +// return FALSE; +// } +// } +// } +// +// /* ---- Determine approximate length of sequence ---- */ +// +// seq_buffer_len = 0; +// +// while ( 1 ) +// { +// if ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) +// { +// if ( ( buffer[ 0 ] == '>' ) ) +// { +// assert( seq_buffer_len != 0 ); +// +// break; +// } +// else +// { +// seq_buffer_len += strlen( buffer ); +// } +// } +// else +// { +// if ( ferror( fp ) != 0 ) +// { +// fprintf( stderr, "ERROR: get_fasta_seq failed: %s\n", strerror( errno ) ); +// abort(); +// } +// else if ( feof( fp ) != 0 ) +// { +// break; +// } +// } +// } +// +// /* ---- Allocate approximate memory for sequence ---- */ +// +// seq = mem_get( seq_buffer_len + 1 ); +// +// /* ---- Rewind file pointer and read sequence ---- */ +// +// if ( fseek( fp, offset, SEEK_SET ) != 0 ) +// { +// fprintf( stderr, "ERROR: fseek SEEK_SET failed: %s\n", strerror( errno ) ); +// abort(); +// } +// +// buffer_read = 0; +// seq_len = 0; +// +// while ( buffer_read < seq_buffer_len ) +// { +// if ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) +// { +// for ( i = 0; buffer[ i ]; i++ ) +// { +// if ( buffer[ i ] > 32 && buffer[ i ] < 127 ) +// { +// seq[ seq_len ] = buffer[ i ]; +// +// seq_len++; +// } +// } +// +// buffer_read += i; +// } +// else +// { +// if ( ferror( fp ) != 0 ) +// { +// fprintf( stderr, "ERROR: get_fasta_seq failed: %s\n", strerror( errno ) ); +// abort(); +// } +// else if ( feof( fp ) != 0 ) +// { +// fprintf( stderr, "ERROR: get_fasta_seq failed: EOF\n" ); +// abort(); +// } +// } +// } +// +//// seq = mem_resize( seq, seq_len + 1 ); +// +// seq[ seq_len + 1 ] = '\0'; +// +// ( *entry )->seq_name = seq_name; +// ( *entry )->seq = seq; +// ( *entry )->seq_len = seq_len; +// +// return TRUE; +//} + + +void fasta_put_entry( seq_entry *entry ) { /* Martin A. Hansen, May 2008 */ @@ -175,7 +302,7 @@ void fasta_put_entries( struct list *entries ) } -void fasta_free_entry( struct seq_entry *entry ) +void fasta_free_entry( seq_entry *entry ) { /* Martin A. Hansen, June 2008 */ diff --git a/code_c/Maasha/src/lib/filesys.c b/code_c/Maasha/src/lib/filesys.c index 9a338e5..05d5f01 100644 --- a/code_c/Maasha/src/lib/filesys.c +++ b/code_c/Maasha/src/lib/filesys.c @@ -102,12 +102,6 @@ char *file_read( FILE *fp, size_t len ) fprintf( stderr, "ERROR: file_read failed\n" ); abort(); } - else if ( feof( fp ) ) - { - fprintf( stderr, "ERROR: file_read failed - end-of-file reached\n" ); - - abort(); - } string[ len ] = '\0'; @@ -151,18 +145,18 @@ void file_rename( char *old_name, char *new_name ) /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FILE BUFFER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ -struct file_buffer *read_open_buffer( char *file ) +file_buffer *buffer_read( char *file ) { /* Martin A. Hansen, June 2008 */ /* Opens a file for reading and loads a new buffer.*/ - struct file_buffer *buffer; - FILE *fp; - char *str; - bool eof; + file_buffer *buffer; + FILE *fp; + char *str; + bool eof; - buffer = mem_get( sizeof( buffer ) ); + buffer = mem_get( sizeof( file_buffer ) ); fp = read_open( file ); @@ -173,8 +167,9 @@ struct file_buffer *read_open_buffer( char *file ) buffer->fp = fp; buffer->str = str; buffer->pos = 0; + buffer->len = 0; buffer->use = 0; - buffer->end = strlen( str ); + buffer->end = strlen( str ) - 1; buffer->size = FILE_BUFFER_SIZE; buffer->eof = eof; @@ -182,7 +177,7 @@ struct file_buffer *read_open_buffer( char *file ) } -char buffer_getc( struct file_buffer *buffer ) +char buffer_getc( file_buffer *buffer ) { /* Martin A. Hansen, June 2008 */ @@ -190,11 +185,11 @@ char buffer_getc( struct file_buffer *buffer ) while ( 1 ) { - if ( buffer->use == buffer->end ) + if ( buffer->use == buffer->end + 1 ) { if ( buffer->eof ) { - return '\0'; + return EOF; } else { @@ -209,7 +204,20 @@ char buffer_getc( struct file_buffer *buffer ) } -char *buffer_gets( struct file_buffer *buffer ) +void buffer_ungetc( file_buffer *buffer ) +{ + /* Martin A. Hansen, August 2008. */ + + /* Rewinds the file buffer one char, */ + /* i.e. put one char back on the buffer. */ + + assert( buffer->use > 0 ); + + buffer->use--; +} + + +char *buffer_gets( file_buffer *buffer ) { /* Martin A. Hansen, June 2008 */ @@ -221,16 +229,17 @@ char *buffer_gets( struct file_buffer *buffer ) while ( 1 ) { - if ( ( pt = memchr( &buffer->str[ buffer->use ], '\n', buffer->end - buffer->use ) ) != NULL ) + if ( ( pt = memchr( &buffer->str[ buffer->use ], '\n', buffer->end + 1 - buffer->use ) ) != NULL ) { line_size = pt - &buffer->str[ buffer->use ] + 1; - line = mem_get( line_size ); + line = mem_get( line_size + 1 ); memcpy( line, &buffer->str[ buffer->use ], line_size ); line[ line_size ] = '\0'; + buffer->len = line_size; buffer->use += line_size; buffer_new_size( buffer, line_size ); @@ -239,9 +248,30 @@ char *buffer_gets( struct file_buffer *buffer ) } else { - if ( buffer->eof ) { - return NULL; - } else { + if ( buffer->eof ) + { + if ( buffer->use < buffer->end ) + { + line_size = buffer->end - buffer->use + 1; + + line = mem_get( line_size + 1 ); + + memcpy( line, &buffer->str[ buffer->use ], line_size ); + + line[ line_size ] = '\0'; + + buffer->len = line_size; + buffer->use += line_size; + + return line; + } + else + { + return NULL; + } + } + else + { buffer_resize( buffer ); } } @@ -249,24 +279,41 @@ char *buffer_gets( struct file_buffer *buffer ) } -void buffer_new_size( struct file_buffer *buffer, int len ) +void buffer_ungets( file_buffer *buffer ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Rewind the file buffer one line, */ + /* i.e. put one line back on the buffer. */ + + assert( buffer->use >= buffer->len ); + + buffer->use -= buffer->len; +} + + +void buffer_new_size( file_buffer *buffer, long len ) { /* Martin A. Hansen, June 2008 */ - /* Increases buffer size until it is larger than len. */ + /* Doubles buffer size until it is larger than len. */ - while ( buffer->size < len ) + while ( buffer->size <= len ) { buffer->size <<= 1; - if ( buffer->size <= 0 ) { - die( "buffer_new_size failed." ); + printf( "SIZE: %ld\n", buffer->size ); + + if ( buffer->size <= 0 ) + { + fprintf( stderr, "ERROR: buffer_new_size failed.\n" ); + abort(); } } } -void buffer_resize( struct file_buffer *buffer ) +void buffer_resize( file_buffer *buffer ) { /* Martin A. Hansen, June 2008 */ @@ -276,14 +323,23 @@ void buffer_resize( struct file_buffer *buffer ) size_t str_len; size_t new_end; + buffer_print( buffer ); str = file_read( buffer->fp, buffer->size ); + printf( "STR: %s\n", str ); + str_len = strlen( str ); + printf( "STR_LEN: %zu\n", str_len ); + feof( buffer->fp ) ? ( buffer->eof = TRUE ) : ( buffer->eof = FALSE ); + printf( "EOF: %i\n", buffer->eof ); + if ( buffer->pos != 0 ) { + assert( buffer->end >= buffer->pos ); + assert( ( buffer->use - buffer->pos ) != 0 ); memmove( buffer->str, &buffer->str[ buffer->pos ], buffer->use - buffer->pos ); buffer->end -= buffer->pos; @@ -293,44 +349,68 @@ void buffer_resize( struct file_buffer *buffer ) new_end = buffer->end + str_len; + printf( "END: %zu\n", buffer->end ); + printf( "NEW_END: %zu\n", new_end ); + buffer->str = mem_resize( buffer->str, new_end + 1 ); memcpy( &buffer->str[ buffer->end ], str, str_len ); - buffer->str[ new_end + 1 ] = '\0'; - + buffer->str[ str_len ] = '\0'; buffer->end = new_end; + buffer_print( buffer ); + die; mem_free( ( void * ) &str ); } -void buffer_destroy( struct file_buffer *buffer ) +void buffer_move( file_buffer *buffer, size_t size, size_t num ) +{ + /* Martin A. Hansen, August 2008 */ + + /* Moves file buffer of a given size num positions to the left. */ + + memmove( buffer->str, &buffer->str[ num ], size ); + + buffer->end -= num; + buffer->use = 0; + buffer->pos = 0; +} + + +void buffer_destroy( file_buffer **buffer ) { /* Martin A. Hansen, June 2008 */ /* Deallocates memory and close stream used by file buffer. */ - close_stream( buffer->fp ); + file_buffer *pt = *buffer; + + assert( pt != NULL ); + + close_stream( pt->fp ); - mem_free( ( void * ) &buffer->str ); - mem_free( ( void * ) &buffer ); + mem_free( ( void * ) &pt->str ); + mem_free( ( void * ) &pt ); } -void buffer_print( struct file_buffer *buffer ) +void buffer_print( file_buffer *buffer ) { /* Martin A. Hansen, June 2008 */ /* Debug function that prints the content of a file_buffer. */ - printf( "buffer: {\n" ); - printf( " pos : %lu\n", buffer->pos ); - printf( " use : %lu\n", buffer->use ); - printf( " end : %lu\n", buffer->end ); + printf( "\nbuffer: {\n" ); + printf( " pos : %zu\n", buffer->pos ); + printf( " len : %zu\n", buffer->len ); + printf( " use : %zu\n", buffer->use ); + printf( " end : %zu\n", buffer->end ); + printf( " size : %ld\n", buffer->size ); printf( " eof : %d\n", buffer->eof ); printf( " str : ->%s<-\n", buffer->str ); - printf( " str_len: %lu\n", strlen( buffer->str ) ); + printf( " str_len: %zu\n", strlen( buffer->str ) ); printf( "}\n" ); } diff --git a/code_c/Maasha/src/lib/seq.c b/code_c/Maasha/src/lib/seq.c index f11c6b9..db88277 100644 --- a/code_c/Maasha/src/lib/seq.c +++ b/code_c/Maasha/src/lib/seq.c @@ -76,7 +76,7 @@ void complement_nuc( char *seq ) } else if ( is_rna( seq ) ) { complement_rna( seq ); } else { - die( "Complement nuc failed.\n" ); + abort(); } } @@ -438,7 +438,7 @@ char *seq_guess_type( char *seq ) } else if ( is_protein( seq ) ) { type = "PROTEIN"; } else { - die( "Could not guess sequence type.\n" ); + abort(); } return type; @@ -476,7 +476,7 @@ int oligo2bin( char *oligo ) int bin; if ( strlen( oligo ) > 15 ) { - die( "Oligo will not fit in an integer." ); + abort(); } bin = 0; @@ -493,7 +493,7 @@ int oligo2bin( char *oligo ) case 'U': case 'u': bin |= 1; break; case 'C': case 'c': bin |= 2; break; case 'G': case 'g': bin |= 3; break; - default: die( "Unrecognized nucleotide." ); + default: abort(); } } diff --git a/code_c/Maasha/src/lib/ucsc.c b/code_c/Maasha/src/lib/ucsc.c index 2d831e0..b349c94 100644 --- a/code_c/Maasha/src/lib/ucsc.c +++ b/code_c/Maasha/src/lib/ucsc.c @@ -42,7 +42,7 @@ void bed_split( char *string, struct bed_entry12 *bed, int cols ) if ( ( new_line = memchr( string, '\n', 1024 ) ) != NULL ) { new_line_pos = new_line - string; } else { - die( "bed_split: no newline found." ); + abort(); } field_num = 0; @@ -62,7 +62,7 @@ void bed_split( char *string, struct bed_entry12 *bed, int cols ) } else { - die( "bed_split: no tab found." ); + abort(); } offset += pos + 1; @@ -107,7 +107,7 @@ void bed_split( char *string, struct bed_entry12 *bed, int cols ) } if ( pt == NULL ) { - die( "bed parse failed." ); + abort(); } offset = field_seps[ i ] + 1; diff --git a/code_c/Maasha/src/repeat-O-matic.c b/code_c/Maasha/src/repeat-O-matic.c index 3c6f340..b4fe442 100644 --- a/code_c/Maasha/src/repeat-O-matic.c +++ b/code_c/Maasha/src/repeat-O-matic.c @@ -39,7 +39,7 @@ int main( int argc, char *argv[] ) array = oligo_count( path ); - oligo_count_output( path, array ); + //oligo_count_output( path, array ); return 0; } @@ -51,18 +51,19 @@ uint *oligo_count( char *path ) /* Count the occurence of all oligos of a fixed size in a FASTA file. */ - uint *array = NULL; - uint i = 0; - uint mask = 0; - uint bin = 0; - uint bin_rc1 = 0; - uint bin_rc2 = 0; - uint j = 0; - uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */ - uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */ - uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */ - struct seq_entry *entry = NULL; - FILE *fp = NULL; + uint *array = NULL; + uint i = 0; + uint mask = 0; + uint bin = 0; + uint bin_rc1 = 0; + uint bin_rc2 = 0; + uint j = 0; + uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */ + uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */ + uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */ + seq_entry *entry = NULL; + FILE *fp = NULL; + file_buffer *buffer = NULL; array = mem_get_zero( sizeof( uint ) * SIZE ); @@ -72,7 +73,7 @@ uint *oligo_count( char *path ) fp = read_open( path ); - while ( ( fasta_get_entry( fp, entry ) ) ) + while ( ( fasta_get_entry( buffer, &entry ) ) ) { fprintf( stderr, "Counting oligos in: %s ... ", entry->seq_name ); @@ -147,105 +148,106 @@ uint mask_create( int oligo_size ) } -void oligo_count_output( char *path, uint *array ) -{ - /* Martin A. Hansen, June 2008 */ - - /* Output oligo count for each sequence position. */ - - struct seq_entry *entry; - FILE *fp; - uint mask; - uint i; - uint j; - uint bin; - int count; - uint *block; - uint block_pos; - uint block_beg; - uint block_size; - uint chr_pos; - - mask = mask_create( OLIGO_SIZE ); - - entry = mem_get( sizeof( entry ) ); - - fp = read_open( path ); - - while ( ( fasta_get_entry( fp, entry ) ) ) - { - fprintf( stderr, "Writing results for: %s ... ", entry->seq_name ); - - bin = 0; - j = 0; - block_pos = 0; - block_size = sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE ); - block = mem_get_zero( block_size ); - - for ( i = 0; entry->seq[ i ]; i++ ) - { - bin <<= 2; - - switch( entry->seq[ i ] ) - { - case 'A': case 'a': j++; break; - case 'T': case 't': bin |= T; j++; break; - case 'C': case 'c': bin |= C; j++; break; - case 'G': case 'g': bin |= G; j++; break; - default: bin = 0; j = 0; break; - } - - if ( j >= OLIGO_SIZE ) - { - count = array[ ( bin & mask ) ]; - - if ( count > 1 ) - { - chr_pos = i - OLIGO_SIZE + 1; - - if ( block_pos == 0 ) - { - memset( block, '\0', block_size ); - - block_beg = chr_pos; - - block[ block_pos ] = count; - - block_pos++; - } - else - { - if ( chr_pos > block_beg + block_pos ) - { - fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos ); - - block_pos = 0; - } - else - { - block[ block_pos ] = count; - - block_pos++; - } - } - } - } - } - - if ( block_pos > 0 ) - { - fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos ); - - mem_free( ( void * ) &block ); - } - - fprintf( stderr, "done.\n" ); - } - - close_stream( fp ); - - fasta_free_entry( entry ); -} +//void oligo_count_output( char *path, uint *array ) +//{ +// /* Martin A. Hansen, June 2008 */ +// +// /* Output oligo count for each sequence position. */ +// +// struct seq_entry *entry; +// FILE *fp; +// uint mask; +// uint i; +// uint j; +// uint bin; +// int count; +// uint *block; +// uint block_pos; +// uint block_beg; +// uint block_size; +// uint chr_pos; +// file_buffer *buffer; +// +// mask = mask_create( OLIGO_SIZE ); +// +// entry = mem_get( sizeof( entry ) ); +// +// fp = read_open( path ); +// +// while ( ( fasta_get_entry( buffer, &entry ) ) ) +// { +// fprintf( stderr, "Writing results for: %s ... ", entry->seq_name ); +// +// bin = 0; +// j = 0; +// block_pos = 0; +// block_size = sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE ); +// block = mem_get_zero( block_size ); +// +// for ( i = 0; entry->seq[ i ]; i++ ) +// { +// bin <<= 2; +// +// switch( entry->seq[ i ] ) +// { +// case 'A': case 'a': j++; break; +// case 'T': case 't': bin |= T; j++; break; +// case 'C': case 'c': bin |= C; j++; break; +// case 'G': case 'g': bin |= G; j++; break; +// default: bin = 0; j = 0; break; +// } +// +// if ( j >= OLIGO_SIZE ) +// { +// count = array[ ( bin & mask ) ]; +// +// if ( count > 1 ) +// { +// chr_pos = i - OLIGO_SIZE + 1; +// +// if ( block_pos == 0 ) +// { +// memset( block, '\0', block_size ); +// +// block_beg = chr_pos; +// +// block[ block_pos ] = count; +// +// block_pos++; +// } +// else +// { +// if ( chr_pos > block_beg + block_pos ) +// { +// fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos ); +// +// block_pos = 0; +// } +// else +// { +// block[ block_pos ] = count; +// +// block_pos++; +// } +// } +// } +// } +// } +// +// if ( block_pos > 0 ) +// { +// fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos ); +// +// mem_free( ( void * ) &block ); +// } +// +// fprintf( stderr, "done.\n" ); +// } +// +// close_stream( fp ); +// +// fasta_free_entry( entry ); +//} void fixedstep_put_entry( char *chr, int beg, int step_size, uint *block_array, int block_size ) diff --git a/code_c/Maasha/src/test/Makefile b/code_c/Maasha/src/test/Makefile index a74cfc3..d2f5758 100644 --- a/code_c/Maasha/src/test/Makefile +++ b/code_c/Maasha/src/test/Makefile @@ -9,7 +9,10 @@ LIB = -lm $(LIB_DIR)*.o all: test -test: test_fasta test_filesys test_mem test_strings +test: test_common test_fasta test_filesys test_mem test_strings + +test_common: test_common.c $(LIB_DIR)common.c + $(CC) $(Cflags) $(INC) $(LIB) test_common.c -o test_common test_fasta: test_fasta.c $(LIB_DIR)fasta.c $(CC) $(Cflags) $(INC) $(LIB) test_fasta.c -o test_fasta @@ -24,6 +27,7 @@ test_strings: test_strings.c $(LIB_DIR)strings.c $(CC) $(Cflags) $(INC) $(LIB) test_strings.c -o test_strings clean: + rm test_common rm test_fasta rm test_filesys rm test_mem diff --git a/code_c/Maasha/src/test/test_common.c b/code_c/Maasha/src/test/test_common.c new file mode 100644 index 0000000..29656b9 --- /dev/null +++ b/code_c/Maasha/src/test/test_common.c @@ -0,0 +1,49 @@ +#include "common.h" + +static void test_true(); +static void test_bool(); + +int main() +{ + fprintf( stderr, "Running all tests for common.c\n" ); + + test_true(); + test_bool(); + + fprintf( stderr, "Done\n\n" ); + + return EXIT_SUCCESS; +} + + +void test_true() +{ + fprintf( stderr, " Testing true ... " ); + + assert( TRUE == 1 ); + assert( FALSE == 0 ); + + fprintf( stderr, "OK\n" ); +} + + +void test_bool() +{ + fprintf( stderr, " Testing bool ... " ); + + bool answer; + + answer = TRUE; + + assert( answer == TRUE ); + assert( answer == 1 ); + + answer = FALSE; + + assert( answer == FALSE ); + assert( answer == 0 ); + + fprintf( stderr, "OK\n" ); +} + + diff --git a/code_c/Maasha/src/test/test_fasta.c b/code_c/Maasha/src/test/test_fasta.c index 33a73d5..c6ca620 100644 --- a/code_c/Maasha/src/test/test_fasta.c +++ b/code_c/Maasha/src/test/test_fasta.c @@ -3,10 +3,13 @@ #include "mem.h" #include "fasta.h" -#define TEST_FILE "test/test_files/test.fna" +#define TEST_FILE1 "test/test_files/test.fna" +#define TEST_FILE2 "/Users/m.hansen/DATA/genomes/hg18/hg18.fna" +#define TEST_COUNT 10 static void test_fasta_get_entry(); + int main() { fprintf( stderr, "Running all tests for fasta.c\n" ); @@ -23,22 +26,26 @@ void test_fasta_get_entry() { fprintf( stderr, " Testing fasta_get_entry ... " ); - FILE *fp; + file_buffer *buffer; seq_entry *entry; - fp = read_open( TEST_FILE ); + buffer = buffer_read( TEST_FILE1 ); - entry = mem_get( sizeof( entry ) ); + entry = mem_get( sizeof( seq_entry ) ); - if ( fasta_get_entry( fp, &entry ) != FALSE ) + while ( fasta_get_entry( buffer, &entry ) != FALSE ) { - assert( strlen( entry->seq_name ) == 5 ); - assert( strlen( entry->seq ) == 60 ); - assert( entry->seq_len == 60 ); - assert( strlen( entry->seq ) == entry->seq_len ); +// assert( strlen( entry->seq ) == entry->seq_len ); + +// printf( "%s\t%zu\n", entry->seq_name, entry->seq_len ); + +// free( entry->seq_name ); +// free( entry->seq ); } - close_stream( fp ); + buffer_destroy( &buffer ); + +// mem_free( ( void * ) buffer ); fprintf( stderr, "OK\n" ); } diff --git a/code_c/Maasha/src/test/test_filesys.c b/code_c/Maasha/src/test/test_filesys.c index 2dbc11a..52a70f8 100644 --- a/code_c/Maasha/src/test/test_filesys.c +++ b/code_c/Maasha/src/test/test_filesys.c @@ -1,6 +1,9 @@ #include "common.h" #include "filesys.h" +//#define TEST_FILE "/Users/m.hansen/DATA/genomes/hg18/hg18.fna" +#define TEST_FILE "test/test_files/test.fna" + static void test_read_open(); static void test_write_open(); static void test_append_open(); @@ -8,6 +11,16 @@ static void test_close_stream(); static void test_file_read(); static void test_file_unlink(); static void test_file_rename(); +static void test_buffer_read(); +static void test_buffer_getc(); +static void test_buffer_ungetc(); +static void test_buffer_gets(); +static void test_buffer_ungets(); +static void test_buffer_new_size(); +static void test_buffer_resize(); +static void test_buffer_move(); +static void test_buffer_destroy(); +static void test_buffer_print(); int main() @@ -22,6 +35,18 @@ int main() test_file_unlink(); test_file_rename(); + test_buffer_move(); + test_buffer_resize(); + + test_buffer_read(); + test_buffer_getc(); + test_buffer_ungetc(); + test_buffer_gets(); + test_buffer_ungets(); + test_buffer_new_size(); + test_buffer_destroy(); + test_buffer_print(); + fprintf( stderr, "Done\n\n" ); return EXIT_SUCCESS; @@ -30,10 +55,10 @@ int main() void test_read_open() { - FILE *fp; - fprintf( stderr, " Testing read_open ... " ); + FILE *fp; + // fp = read_open( "/tmp/asdf" ); // fp = read_open( "/private/etc/ssh_host_rsa_key" ); fp = read_open( "/dev/null" ); @@ -46,10 +71,10 @@ void test_read_open() void test_write_open() { - FILE *fp; - fprintf( stderr, " Testing write_open ... " ); + FILE *fp; + // fp = write_open( "/tmp/asdf" ); // fp = write_open( "/private/etc/ssh_host_rsa_key" ); fp = write_open( "/dev/null" ); @@ -62,10 +87,10 @@ void test_write_open() void test_append_open() { - FILE *fp; - fprintf( stderr, " Testing append_open ... " ); + FILE *fp; + //fp = append_open( "/tmp/asdf" ); //fp = append_open( "/private/etc/ssh_host_rsa_key" ); fp = append_open( "/dev/null" ); @@ -78,10 +103,10 @@ void test_append_open() void test_close_stream() { - FILE *fp; - fprintf( stderr, " Testing close_stream ... " ); + FILE *fp; + fp = read_open( "/dev/null" ); close_stream( fp ); @@ -92,13 +117,13 @@ void test_close_stream() void test_file_read() { + fprintf( stderr, " Testing file_read ... " ); + char *test_file = "/etc/passwd"; char *buffer; FILE *fp; size_t len = 1000; - fprintf( stderr, " Testing file_read ... " ); - fp = read_open( test_file ); buffer = file_read( fp, len ); @@ -114,11 +139,11 @@ void test_file_read() void test_file_unlink() { + fprintf( stderr, " Testing file_unlink ... " ); + char *test_file = "/tmp/test"; FILE *fp; - fprintf( stderr, " Testing file_unlink ... " ); - fp = write_open( test_file ); close_stream( fp ); @@ -135,12 +160,12 @@ void test_file_unlink() void test_file_rename() { + fprintf( stderr, " Testing file_rename ... " ); + char *file_before = "/tmp/before"; char *file_after = "/tmp/after"; FILE *fp; - - fprintf( stderr, " Testing file_rename ... " ); fp = write_open( file_before ); @@ -155,3 +180,370 @@ void test_file_rename() fprintf( stderr, "OK\n" ); } + + +void test_buffer_read() +{ + fprintf( stderr, " Testing buffer_read ... " ); + + char *file = "/tmp/test_buffer_read"; + char *str = "MARTIN"; + FILE *fp; + size_t i; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + assert( buffer->pos == 0 ); + assert( buffer->use == 0 ); + assert( buffer->end == 5 ); + assert( buffer->eof == TRUE ); + + for ( i = 0; str[ i ]; i++ ) { + assert( str[ i ] == buffer->str[ i ] ); + } + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_getc() +{ + fprintf( stderr, " Testing buffer_getc ... " ); + + char *file = "/tmp/test_buffer_getc"; + char *str = "MARTIN"; + FILE *fp; + size_t i; + char c; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + for ( i = 0; str[ i ]; i++ ) + { + c = buffer_getc( buffer ); + + assert( c != EOF ); + + assert( str[ i ] == c ); + } + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_ungetc() +{ + fprintf( stderr, " Testing buffer_ungetc ... " ); + + char *file = "/tmp/test_buffer_ungetc"; + char *str = "MARTIN"; + FILE *fp; + char c; + size_t i; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + c = buffer_getc( buffer ); + + assert( c == 'M' ); + + buffer_ungetc( buffer ); + + i = 0; + + while ( ( c = buffer_getc( buffer ) ) != EOF ) + { + assert( c == str[ i ] ); + + i++; + } + + assert( c == EOF ); + + buffer_ungetc( buffer ); + + c = buffer_getc( buffer ); + + assert( c == 'N' ); + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_gets() +{ + fprintf( stderr, " Testing buffer_gets ... " ); + + char *file = "/tmp/test_buffer_gets"; + char *out = "MARTIN\nASSER\nHANSEN\n"; + FILE *fp; + char *str; + int i; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, out ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + i = 0; + + while( ( str = buffer_gets( buffer ) ) != NULL ) + { + if ( i == 0 ) { + assert( strcmp( str, "MARTIN\n" ) == 0 ); + } else if ( i == 1 ) { + assert( strcmp( str, "ASSER\n" ) == 0 ); + } else if ( i == 2 ) { + assert( strcmp( str, "HANSEN\n" ) == 0 ); + } + + i++; + } + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_ungets() +{ + fprintf( stderr, " Testing buffer_ungets ... " ); + + char *file = "/tmp/test_buffer_ungets"; + char *out = "MARTIN\nASSER\nHANSEN\n"; + FILE *fp; + char *str1; + char *str2; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, out ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + str1 = buffer_gets( buffer ); + + buffer_ungets( buffer ); + + str2 = buffer_gets( buffer ); + + assert( strcmp( str1, str2 ) == 0 ); + + while ( ( str1 = buffer_gets( buffer ) ) != NULL ) + { + } + + buffer_ungets( buffer ); + + str1 = buffer_gets( buffer ); + + assert( ( strcmp( str1, "HANSEN\n" ) ) == 0 ); + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_new_size() +{ + fprintf( stderr, " Testing buffer_new_size ... " ); + + char *file = "/tmp/test_buffer_new_size"; + char *str = "X"; + FILE *fp; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + buffer_new_size( buffer, 201048577 ); + + assert( buffer->size == 268435456 ); + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_resize() +{ + fprintf( stderr, " Testing buffer_resize ... " ); + + char *file = "/tmp/test_buffer_new_size"; + char *str = "ABC"; + FILE *fp; + char c; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + while ( ( c = buffer_getc( buffer ) ) != EOF ) + { + printf( "C: %c\n", c ); + + + } + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_move() +{ + fprintf( stderr, " Testing buffer_resize ... " ); + + char *file = "/tmp/test_buffer_new_size"; + char *str = "ABCDEFG"; + FILE *fp; + file_buffer *buffer; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + buffer_print( buffer ); + + buffer_move( buffer, 7, 2 ); + + buffer_print( buffer ); + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_destroy() +{ + fprintf( stderr, " Testing buffer_destroy ... " ); + + char *file = "/tmp/test_buffer_destroy"; + char *str = "X"; + FILE *fp; + file_buffer *buffer = NULL; + + fp = write_open( file ); + + fprintf( fp, str ); + + close_stream( fp ); + + buffer = buffer_read( file ); + + buffer_destroy( &buffer ); + + assert( buffer->str == NULL ); + + buffer = NULL; + + assert( buffer == NULL ); + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + +void test_buffer_print() +{ + fprintf( stderr, " Testing buffer_print ... " ); + + file_buffer *buffer; + + buffer = buffer_read( TEST_FILE ); + +// buffer_print( buffer ); + + buffer_destroy( &buffer ); + + buffer = NULL; + + fprintf( stderr, "OK\n" ); +} + diff --git a/code_c/Maasha/src/test_all.pl b/code_c/Maasha/src/test_all.pl index ba9d92a..425e8e7 100755 --- a/code_c/Maasha/src/test_all.pl +++ b/code_c/Maasha/src/test_all.pl @@ -8,6 +8,7 @@ my ( $test_dir, @tests, $test ); $test_dir = "test"; @tests = qw( + test_common test_fasta test_filesys test_mem -- 2.39.5