From 681acf3b8eeee98264f6030cc3204915622433b6 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 21 Aug 2008 00:27:09 +0000 Subject: [PATCH] more updates of c and fixes to upload_to_ucsc git-svn-id: http://biopieces.googlecode.com/svn/trunk@212 74ccb610-7750-0410-82ae-013aeee3265d --- code_c/Maasha/src/gmon.out | Bin 736585 -> 97 bytes code_c/Maasha/src/inc/fasta.h | 4 +- code_c/Maasha/src/inc/filesys.h | 62 ++-- code_c/Maasha/src/lib/fasta.c | 43 +-- code_c/Maasha/src/lib/filesys.c | 241 ++++++++------- code_c/Maasha/src/repeat-O-matic.c | 2 +- code_c/Maasha/src/test/Makefile | 2 +- code_c/Maasha/src/test/test_fasta.c | 85 ++++-- code_c/Maasha/src/test/test_filesys.c | 274 +++++++++++------- code_c/Maasha/src/{test_all.pl => testall.pl} | 0 code_perl/Maasha/Biopieces.pm | 8 +- code_perl/Maasha/Calc.pm | 15 + code_perl/Maasha/UCSC.pm | 16 +- 13 files changed, 466 insertions(+), 286 deletions(-) rename code_c/Maasha/src/{test_all.pl => testall.pl} (100%) diff --git a/code_c/Maasha/src/gmon.out b/code_c/Maasha/src/gmon.out index 5176fb2725a133e7561246dd60826927015b4842..4c6cbd904a128d373eb5d6be0553ab95b4623bdf 100644 GIT binary patch delta 11 ScmX^4ODAzcEK5nUg*gBq*aV0G literal 736585 zcmeI)J!?}@9KhjI+i0z=@71VPt6iln;uo+ws2~Um)_1EZC^Qyq9Rhv;2Zs!ff?eE$ znp%`n}wjQo0(~wz!Jd#EXUJ z6LU`=71#B~!s2Xwd}_Sbi28SUVPmYARi9aWa$&w+pMN%fzCE)i=B|#V)I6S2b+){3 zR4b{rv4%GS1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1h$GmZ){{T zP7f`eNa^Fvl!~)I#q~MvzqPwKT=YHeE7Rpz@yu%U*IQ-pj9!cWaHs4&(Z8bKJt(^x zeJ!4Sf2r)^(9)Z%E72=iZ;!6V+MW43qNlU&k3MrdrP}3ky@BYttanAXvL1|H&w42O zT;{O;)L-IS)TlnlJ&vpi}$2;jsD(sKMwV3tM=+&%`McS0t5&UAn-p4bpB8H3yoTA6951J diff --git a/code_c/Maasha/src/inc/fasta.h b/code_c/Maasha/src/inc/fasta.h index fac3023..a0f5619 100644 --- a/code_c/Maasha/src/inc/fasta.h +++ b/code_c/Maasha/src/inc/fasta.h @@ -1,5 +1,7 @@ #define FASTA_BUFFER 256 * 1024 +#define isseq( x ) ( x > 32 && x < 127 ) ? TRUE : FALSE + /* Structure of a sequence entry. */ struct _seq_entry { @@ -14,7 +16,7 @@ typedef struct _seq_entry seq_entry; size_t fasta_count( FILE *fp ); /* Get next sequence entry from a FASTA file given a file pointer. */ -bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ); +bool fasta_get_entry( file_buffer **buffer_ppt, seq_entry **entry ); /* Output a sequence entry in FASTA format. */ void fasta_put_entry( seq_entry *entry ); diff --git a/code_c/Maasha/src/inc/filesys.h b/code_c/Maasha/src/inc/filesys.h index 0e83317..733308a 100644 --- a/code_c/Maasha/src/inc/filesys.h +++ b/code_c/Maasha/src/inc/filesys.h @@ -1,76 +1,84 @@ -#define FILE_BUFFER_SIZE 64 * 1024 -//#define FILE_BUFFER_SIZE 1 +//#define FILE_BUFFER_SIZE 64 * 1024 +#define FILE_BUFFER_SIZE 1 struct _file_buffer { - FILE *fp; /* file pointer */ - char *str; /* the buffer string */ - size_t pos; /* index pointing to last position where some token was found */ - size_t len; /* length of some found token */ - size_t use; /* index indicating how much of the buffer is scanned */ - size_t end; /* end position of buffer */ - long size; /* default buffer size */ - bool eof; /* flag indicating that buffer reached EOF */ + FILE *fp; /* file pointer */ + size_t token_pos; /* index pointing to last position where some token was found */ + size_t token_len; /* length of some found token */ + size_t buffer_pos; /* index indicating how much of the buffer is scanned */ + size_t buffer_end; /* end position of buffer */ + long buffer_size; /* default buffer size */ + char *str; /* the buffer string */ + bool eof; /* flag indicating that buffer reached EOF */ }; typedef struct _file_buffer file_buffer; /* Read-open a file and return a file pointer. */ -FILE *read_open( char *file ); +FILE *read_open( char *file ); /* Write-open a file and return a file pointer. */ -FILE *write_open( char *file ); +FILE *write_open( char *file ); /* Append-open a file and return a file pointer. */ -FILE *append_open( char *file ); +FILE *append_open( char *file ); /* Close a stream defined by a file pointer. */ -void close_stream( FILE *fp ); +void close_stream( FILE *fp ); /* Read in len number of bytes from the current position of a */ /* file pointer into a string that is allocated and null terminated. */ -char *file_read( FILE *fp, size_t len ); +/* The number of read chars is returned. */ +size_t file_read( FILE *fp, char **string_ppt, size_t len ); /* Delete a file. */ -void file_unlink( char *file ); +void file_unlink( char *file ); /* Rename a file. */ -void file_rename( char *old_name, char *new_name ); +void file_rename( char *old_name, char *new_name ); /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FILE BUFFER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ /* Opens a file for reading and loads a new buffer.*/ -file_buffer *buffer_read( char *file ); +/* The number of read chars is returned. */ +size_t buffer_new( char *file, file_buffer **buffer_ppt, size_t size ); + +/* Read in buffer->size bytes from file and appends to buffer string. */ +/* The number of read chars is returned. */ +size_t buffer_read( file_buffer **buffer_ppt ); /* Get the next char from a file buffer, which is resized if necessary, until EOF.*/ -char buffer_getc( file_buffer *buffer ); +char buffer_getc( file_buffer *buffer ); /* Rewinds the file buffer one char, i.e. put one char back on the buffer. */ -void buffer_ungetc( file_buffer *buffer ); +void buffer_ungetc( file_buffer *buffer ); /* Get the next line that is terminated by \n or EOF from a file buffer. */ -char *buffer_gets( file_buffer *buffer ); +/* The number of read chars is returned. */ +char *buffer_gets( file_buffer *buffer ); /* Rewind the file buffer one line, i.e. put one line back on the buffer. */ -void buffer_ungets( file_buffer *buffer ); +void buffer_ungets( file_buffer *buffer ); /* Doubles buffer size until it is larger than len. */ -void buffer_new_size( file_buffer *buffer, long len ); +void buffer_new_size( file_buffer *buffer, long len ); /* Resize file buffer discarding any old buffer before offset, */ /* and merge remaining old buffer with a new chunk of buffer. */ -void buffer_resize( file_buffer *buffer ); +bool buffer_resize( file_buffer *buffer ); /* Moves file buffer of a given size num positions to the left. */ -void buffer_move( file_buffer *buffer, size_t size, size_t num ); +/* The size of the resulting string is returned. */ +size_t buffer_move( file_buffer *buffer, size_t size, size_t num ); /* Deallocates memory and close stream used by file buffer. */ -void buffer_destroy( file_buffer **buffer ); +void buffer_destroy( file_buffer **buffer_ppt ); /* Debug function that prints the content of a file_buffer. */ -void buffer_print( file_buffer *buffer ); +void buffer_print( file_buffer *buffer ); /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ diff --git a/code_c/Maasha/src/lib/fasta.c b/code_c/Maasha/src/lib/fasta.c index 5ee46c6..3e5880c 100644 --- a/code_c/Maasha/src/lib/fasta.c +++ b/code_c/Maasha/src/lib/fasta.c @@ -27,18 +27,19 @@ size_t fasta_count( FILE *fp ) } -bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ) +bool fasta_get_entry( file_buffer **buffer_ppt, seq_entry **entry_ppt ) { /* Martin A. Hansen, August 2008 */ /* Get next sequence entry from a FASTA file given a file buffer. */ - char *line; - size_t seq_name_len; - size_t seq_len; - char *seq_name; - char *seq; - size_t i; + file_buffer *buffer = *buffer_ppt; + char *line = NULL; + size_t seq_name_len = 0; + size_t seq_len = 0; + char *seq_name = NULL; + char *seq = NULL; + size_t i = 0; while ( 1 ) { @@ -46,7 +47,7 @@ bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ) { if ( line[ 0 ] == '>' ) { - seq_name_len = buffer->len - 2; + seq_name_len = buffer->token_len - 2; seq_name = mem_get( seq_name_len + 1 ); memcpy( seq_name, &line[ 1 ], seq_name_len ); @@ -54,8 +55,14 @@ bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ) break; } } + else + { + return FALSE; + } } +// printf( "SEQ_NAME: ->%s<-\n", seq_name ); + seq = mem_get( 1 ); seq_len = 0; @@ -72,11 +79,11 @@ bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ) } else { - mem_resize( seq, seq_len + strlen( line ) ); +// mem_resize( seq, seq_len + buffer->token_len ); for ( i = 0; line[ i ]; i++ ) { - if ( line[ i ] > 32 && line[ i ] < 127 ) + if ( isseq( line[ i ] ) ) { seq[ seq_len ] = line[ i ]; @@ -95,17 +102,19 @@ bool fasta_get_entry( file_buffer *buffer, seq_entry **entry ) { return FALSE; } -// seq = mem_resize( seq, seq_len + 1 ); +// seq = mem_resize( seq, seq_len ); + + seq[ seq_len ] = '\0'; - seq[ seq_len + 1 ] = '\0'; +// printf( "SEQ: ->%s<-\n", seq ); -// should probably use memcpy below + ( *entry_ppt )->seq_name = seq_name; + ( *entry_ppt )->seq = seq; + ( *entry_ppt )->seq_len = seq_len; - ( *entry )->seq_name = seq_name; - ( *entry )->seq = seq; - ( *entry )->seq_len = seq_len; + *buffer_ppt = buffer; - return FALSE; + return TRUE; } diff --git a/code_c/Maasha/src/lib/filesys.c b/code_c/Maasha/src/lib/filesys.c index 05d5f01..3c80226 100644 --- a/code_c/Maasha/src/lib/filesys.c +++ b/code_c/Maasha/src/lib/filesys.c @@ -82,20 +82,22 @@ void close_stream( FILE *fp ) } -char *file_read( FILE *fp, size_t len ) +size_t file_read( FILE *fp, char **string_ppt, size_t len ) { /* Martin A. Hansen, June 2008 */ /* Read in len number of bytes from the current position of a */ /* file pointer into a string that is allocated and null terminated. */ + /* The number of read chars is returned. */ - char *string; + char *string = *string_ppt; + size_t num = 0; assert( len > 0 ); string = mem_get( len + 1 ); - fread( string, len, 1, fp ); + num = fread( string, 1, len, fp ); if ( ferror( fp ) != 0 ) { @@ -103,9 +105,11 @@ char *file_read( FILE *fp, size_t len ) abort(); } - string[ len ] = '\0'; + string[ num ] = '\0'; - return string; + *string_ppt = string; + + return num; } @@ -145,35 +149,72 @@ void file_rename( char *old_name, char *new_name ) /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FILE BUFFER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ -file_buffer *buffer_read( char *file ) +size_t buffer_new( char *file, file_buffer **buffer_ppt, size_t size ) { /* Martin A. Hansen, June 2008 */ /* Opens a file for reading and loads a new buffer.*/ + /* The number of read chars is returned. */ - file_buffer *buffer; - FILE *fp; - char *str; - bool eof; + file_buffer *buffer = *buffer_ppt; + FILE *fp = NULL; + size_t num = 0; buffer = mem_get( sizeof( file_buffer ) ); - fp = read_open( file ); + fp = read_open( file ); - str = file_read( fp, FILE_BUFFER_SIZE ); + buffer->fp = fp; + buffer->str = NULL; + buffer->token_pos = 0; + buffer->token_len = 0; + buffer->buffer_pos = 0; + buffer->buffer_end = 0; + buffer->buffer_size = size; + buffer->eof = FALSE; - feof( fp ) ? ( eof = TRUE ) : ( eof = FALSE ); + num = buffer_read( &buffer ); - buffer->fp = fp; - buffer->str = str; - buffer->pos = 0; - buffer->len = 0; - buffer->use = 0; - buffer->end = strlen( str ) - 1; - buffer->size = FILE_BUFFER_SIZE; - buffer->eof = eof; + *buffer_ppt = buffer; - return buffer; + return num; +} + + +size_t buffer_read( file_buffer **buffer_ppt ) +{ + /* Martin A. Hansen, June 2008 */ + + /* Read in buffer->buffer_size bytes from file and appends to buffer string. */ + + file_buffer *buffer = *buffer_ppt; + char *str = NULL; + size_t str_len = 0; + size_t new_end = 0; + size_t num = 0; + + num = file_read( buffer->fp, &str, buffer->buffer_size ); + + if ( num != 0 ) + { + str_len = num; + new_end = buffer->buffer_end + str_len; + + buffer->str = mem_resize( buffer->str, new_end ); + + memcpy( &buffer->str[ buffer->buffer_end ], str, str_len ); + + buffer->str[ new_end ] = '\0'; + buffer->buffer_end = new_end; + + mem_free( ( void * ) &str ); + } + + buffer->eof = feof( buffer->fp ) ? TRUE : FALSE; + + *buffer_ppt = buffer; + + return num; } @@ -185,7 +226,7 @@ char buffer_getc( file_buffer *buffer ) while ( 1 ) { - if ( buffer->use == buffer->end + 1 ) + if ( buffer->buffer_pos == buffer->buffer_end ) { if ( buffer->eof ) { @@ -193,13 +234,16 @@ char buffer_getc( file_buffer *buffer ) } else { - buffer->pos = buffer->use; - buffer_new_size( buffer, buffer->use ); - buffer_resize( buffer ); + buffer->token_pos = buffer->buffer_pos; + buffer_new_size( buffer, buffer->buffer_pos ); // MOVE THIS TO buffer_resize !!!!! ?????? + + if ( ( buffer_resize( buffer ) == FALSE ) ) { + return EOF; + } } } - return buffer->str[ buffer->use++ ]; + return buffer->str[ buffer->buffer_pos++ ]; } } @@ -211,9 +255,9 @@ void buffer_ungetc( file_buffer *buffer ) /* Rewinds the file buffer one char, */ /* i.e. put one char back on the buffer. */ - assert( buffer->use > 0 ); + assert( buffer->buffer_pos > 0 ); - buffer->use--; + buffer->buffer_pos--; } @@ -223,24 +267,24 @@ char *buffer_gets( file_buffer *buffer ) /* Get the next line that is terminated by \n or EOF from a file buffer. */ - char *pt; - char *line; - size_t line_size; + char *pt = NULL; + char *line = NULL; + size_t line_size = 0; while ( 1 ) { - if ( ( pt = memchr( &buffer->str[ buffer->use ], '\n', buffer->end + 1 - buffer->use ) ) != NULL ) + if ( ( pt = memchr( &buffer->str[ buffer->buffer_pos ], '\n', buffer->buffer_end + 1 - buffer->buffer_pos ) ) != NULL ) { - line_size = pt - &buffer->str[ buffer->use ] + 1; + line_size = pt - &buffer->str[ buffer->buffer_pos ] + 1; line = mem_get( line_size + 1 ); - memcpy( line, &buffer->str[ buffer->use ], line_size ); + memcpy( line, &buffer->str[ buffer->buffer_pos ], line_size ); line[ line_size ] = '\0'; - buffer->len = line_size; - buffer->use += line_size; + buffer->token_len = line_size; + buffer->buffer_pos += line_size; buffer_new_size( buffer, line_size ); @@ -250,18 +294,18 @@ char *buffer_gets( file_buffer *buffer ) { if ( buffer->eof ) { - if ( buffer->use < buffer->end ) + if ( buffer->buffer_pos < buffer->buffer_end ) { - line_size = buffer->end - buffer->use + 1; + line_size = buffer->buffer_end - buffer->buffer_pos + 1; line = mem_get( line_size + 1 ); - memcpy( line, &buffer->str[ buffer->use ], line_size ); + memcpy( line, &buffer->str[ buffer->buffer_pos ], line_size ); line[ line_size ] = '\0'; - buffer->len = line_size; - buffer->use += line_size; + buffer->token_len = line_size; + buffer->buffer_pos += line_size; return line; } @@ -286,9 +330,9 @@ void buffer_ungets( file_buffer *buffer ) /* Rewind the file buffer one line, */ /* i.e. put one line back on the buffer. */ - assert( buffer->use >= buffer->len ); + assert( buffer->buffer_pos >= buffer->token_len ); - buffer->use -= buffer->len; + buffer->buffer_pos -= buffer->token_len; } @@ -298,13 +342,11 @@ void buffer_new_size( file_buffer *buffer, long len ) /* Doubles buffer size until it is larger than len. */ - while ( buffer->size <= len ) + while ( buffer->buffer_size <= len ) { - buffer->size <<= 1; + buffer->buffer_size <<= 1; - printf( "SIZE: %ld\n", buffer->size ); - - if ( buffer->size <= 0 ) + if ( buffer->buffer_size <= 0 ) { fprintf( stderr, "ERROR: buffer_new_size failed.\n" ); abort(); @@ -313,86 +355,67 @@ void buffer_new_size( file_buffer *buffer, long len ) } -void buffer_resize( file_buffer *buffer ) +bool buffer_resize( file_buffer *buffer ) { /* Martin A. Hansen, June 2008 */ /* Resize file buffer. */ - char *str; - size_t str_len; - size_t new_end; - - buffer_print( buffer ); - str = file_read( buffer->fp, buffer->size ); - - printf( "STR: %s\n", str ); - - str_len = strlen( str ); - - printf( "STR_LEN: %zu\n", str_len ); + size_t num = 0; - feof( buffer->fp ) ? ( buffer->eof = TRUE ) : ( buffer->eof = FALSE ); - - printf( "EOF: %i\n", buffer->eof ); - - if ( buffer->pos != 0 ) - { - assert( buffer->end >= buffer->pos ); - assert( ( buffer->use - buffer->pos ) != 0 ); - memmove( buffer->str, &buffer->str[ buffer->pos ], buffer->use - buffer->pos ); - - buffer->end -= buffer->pos; - buffer->use = 0; - buffer->pos = 0; + if ( buffer->token_pos != 0 ) { + buffer_move( buffer, buffer->buffer_pos, buffer->token_pos ); } - new_end = buffer->end + str_len; - - printf( "END: %zu\n", buffer->end ); - printf( "NEW_END: %zu\n", new_end ); + num = buffer_read( &buffer ); - buffer->str = mem_resize( buffer->str, new_end + 1 ); - - memcpy( &buffer->str[ buffer->end ], str, str_len ); - - buffer->str[ str_len ] = '\0'; - buffer->end = new_end; - - buffer_print( buffer ); - die; - mem_free( ( void * ) &str ); + if ( num == 0 ) { + return FALSE; + } else { + return TRUE; + } } -void buffer_move( file_buffer *buffer, size_t size, size_t num ) +size_t buffer_move( file_buffer *buffer, size_t size, size_t num ) { /* Martin A. Hansen, August 2008 */ /* Moves file buffer of a given size num positions to the left. */ + /* The size of the resulting string is returned. */ + + size_t len = 0; + + assert( size > 0 ); + assert( num > 0 ); + assert( num <= size ); memmove( buffer->str, &buffer->str[ num ], size ); - buffer->end -= num; - buffer->use = 0; - buffer->pos = 0; + len = size - num; + + buffer->buffer_end = len; + buffer->buffer_pos = 0; + buffer->token_pos = 0; + + return len; } -void buffer_destroy( file_buffer **buffer ) +void buffer_destroy( file_buffer **buffer_ppt ) { /* Martin A. Hansen, June 2008 */ /* Deallocates memory and close stream used by file buffer. */ - file_buffer *pt = *buffer; + file_buffer *buffer = *buffer_ppt; - assert( pt != NULL ); + assert( buffer != NULL ); - close_stream( pt->fp ); + close_stream( buffer->fp ); - mem_free( ( void * ) &pt->str ); - mem_free( ( void * ) &pt ); + mem_free( ( void * ) &buffer->str ); + mem_free( ( void * ) &buffer ); } @@ -403,14 +426,18 @@ void buffer_print( file_buffer *buffer ) /* Debug function that prints the content of a file_buffer. */ printf( "\nbuffer: {\n" ); - printf( " pos : %zu\n", buffer->pos ); - printf( " len : %zu\n", buffer->len ); - printf( " use : %zu\n", buffer->use ); - printf( " end : %zu\n", buffer->end ); - printf( " size : %ld\n", buffer->size ); - printf( " eof : %d\n", buffer->eof ); - printf( " str : ->%s<-\n", buffer->str ); - printf( " str_len: %zu\n", strlen( buffer->str ) ); + printf( " token_pos : %zu\n", buffer->token_pos ); + printf( " token_len : %zu\n", buffer->token_len ); + printf( " buffer_pos : %zu\n", buffer->buffer_pos ); + printf( " buffer_end : %zu\n", buffer->buffer_end ); + printf( " buffer_size : %ld\n", buffer->buffer_size ); + printf( " str : ->%s<-\n", buffer->str ); + printf( " eof : %d\n", buffer->eof ); + + if ( buffer->str != NULL ) { + printf( " _str_len_ : %zu\n", strlen( buffer->str ) ); + } + printf( "}\n" ); } diff --git a/code_c/Maasha/src/repeat-O-matic.c b/code_c/Maasha/src/repeat-O-matic.c index b4fe442..6b640d6 100644 --- a/code_c/Maasha/src/repeat-O-matic.c +++ b/code_c/Maasha/src/repeat-O-matic.c @@ -73,7 +73,7 @@ uint *oligo_count( char *path ) fp = read_open( path ); - while ( ( fasta_get_entry( buffer, &entry ) ) ) + while ( ( fasta_get_entry( &buffer, &entry ) ) ) { fprintf( stderr, "Counting oligos in: %s ... ", entry->seq_name ); diff --git a/code_c/Maasha/src/test/Makefile b/code_c/Maasha/src/test/Makefile index d2f5758..43ddd58 100644 --- a/code_c/Maasha/src/test/Makefile +++ b/code_c/Maasha/src/test/Makefile @@ -1,5 +1,5 @@ CC = gcc -Cflags = -Wall -Werror # for gprof +Cflags = -Wall -Werror -g -pg # for gprof INC_DIR = ../inc/ LIB_DIR = ../lib/ diff --git a/code_c/Maasha/src/test/test_fasta.c b/code_c/Maasha/src/test/test_fasta.c index c6ca620..cebb6d7 100644 --- a/code_c/Maasha/src/test/test_fasta.c +++ b/code_c/Maasha/src/test/test_fasta.c @@ -6,15 +6,20 @@ #define TEST_FILE1 "test/test_files/test.fna" #define TEST_FILE2 "/Users/m.hansen/DATA/genomes/hg18/hg18.fna" #define TEST_COUNT 10 +#define TEST_SIZE 10 -static void test_fasta_get_entry(); +//static void test_fasta_get_entry(); +static void test_fasta_get_seq_name(); +//static void test_fasta_put_entry(); int main() { fprintf( stderr, "Running all tests for fasta.c\n" ); - test_fasta_get_entry(); +// test_fasta_get_entry(); + test_fasta_get_seq_name(); +// test_fasta_put_entry(); fprintf( stderr, "Done\n\n" ); @@ -22,32 +27,64 @@ int main() } -void test_fasta_get_entry() +//void test_fasta_get_entry() +//{ +// fprintf( stderr, " Testing fasta_get_entry ... " ); +// +// file_buffer *buffer = NULL; +// seq_entry *entry = NULL; +// +// buffer_new( TEST_FILE1, &buffer, TEST_SIZE ); +// +// entry = mem_get( sizeof( seq_entry ) ); +// +// +// while ( fasta_get_entry( &buffer, &entry ) != FALSE ) +// { +// fasta_put_entry( entry ); +//// assert( strlen( entry->seq ) == entry->seq_len ); +// +//// printf( "%s\t%zu\n", entry->seq_name, entry->seq_len ); +// +// } +// +// buffer_destroy( &buffer ); +// +// buffer = NULL; +// +// fprintf( stderr, "OK\n" ); +//} +// + + +void test_fasta_get_seq_name() { - fprintf( stderr, " Testing fasta_get_entry ... " ); + fprintf( stderr, " Testing fasta_get_seq_name ... " ); - file_buffer *buffer; - seq_entry *entry; - - buffer = buffer_read( TEST_FILE1 ); - - entry = mem_get( sizeof( seq_entry ) ); - - while ( fasta_get_entry( buffer, &entry ) != FALSE ) - { -// assert( strlen( entry->seq ) == entry->seq_len ); - -// printf( "%s\t%zu\n", entry->seq_name, entry->seq_len ); - -// free( entry->seq_name ); -// free( entry->seq ); - } - - buffer_destroy( &buffer ); - -// mem_free( ( void * ) buffer ); + assert( strcmp( seq_name, "test0" ) == 0 ); fprintf( stderr, "OK\n" ); } +//void test_fasta_put_entry() +//{ +// fprintf( stderr, " Testing fasta_put_entry ... " ); +// +// seq_entry *entry = NULL; +// char *seq_name = "test"; +// char *seq = "ATCG"; +// size_t seq_len = strlen( seq ); +// +// entry = mem_get( sizeof( seq_entry ) ); +// +// entry->seq_name = seq_name; +// entry->seq = seq; +// entry->seq_len = seq_len; +// +// fasta_put_entry( entry ); +// +// fprintf( stderr, "OK\n" ); +//} + + diff --git a/code_c/Maasha/src/test/test_filesys.c b/code_c/Maasha/src/test/test_filesys.c index 52a70f8..f3c3878 100644 --- a/code_c/Maasha/src/test/test_filesys.c +++ b/code_c/Maasha/src/test/test_filesys.c @@ -3,6 +3,7 @@ //#define TEST_FILE "/Users/m.hansen/DATA/genomes/hg18/hg18.fna" #define TEST_FILE "test/test_files/test.fna" +#define TEST_SIZE 1 static void test_read_open(); static void test_write_open(); @@ -11,6 +12,7 @@ static void test_close_stream(); static void test_file_read(); static void test_file_unlink(); static void test_file_rename(); +static void test_buffer_new(); static void test_buffer_read(); static void test_buffer_getc(); static void test_buffer_ungetc(); @@ -34,16 +36,15 @@ int main() test_file_read(); test_file_unlink(); test_file_rename(); - - test_buffer_move(); - test_buffer_resize(); - + test_buffer_new(); test_buffer_read(); test_buffer_getc(); test_buffer_ungetc(); test_buffer_gets(); test_buffer_ungets(); test_buffer_new_size(); + test_buffer_resize(); + test_buffer_move(); test_buffer_destroy(); test_buffer_print(); @@ -119,19 +120,26 @@ void test_file_read() { fprintf( stderr, " Testing file_read ... " ); - char *test_file = "/etc/passwd"; - char *buffer; - FILE *fp; - size_t len = 1000; + char *file = "/tmp/test_file_read"; + char *str1 = "MARTIN"; + char *str2 = NULL; + FILE *fp1 = NULL; + FILE *fp2 = NULL; + size_t len = strlen( str1 ); + size_t num = 0; - fp = read_open( test_file ); + fp1 = write_open( file ); + fprintf( fp1, str1 ); + close_stream( fp1 ); - buffer = file_read( fp, len ); + fp2 = read_open( file ); - close_stream( fp ); + num = file_read( fp2, &str2, len ); - assert( strlen( buffer ) == len ); - assert( buffer[ len ] == '\0' ); + close_stream( fp2 ); + + assert( num == len ); + assert( strcmp( str1, str2 ) == 0 ); fprintf( stderr, "OK\n" ); } @@ -182,39 +190,107 @@ void test_file_rename() } +void test_buffer_new() +{ + fprintf( stderr, " Testing buffer_new ... " ); + + char *file = "/tmp/test_buffer_new"; + char *str = "MARTIN"; + FILE *fp = NULL; + size_t size = 10; + size_t num = 0; + file_buffer *buffer = NULL; + + fp = write_open( file ); + fprintf( fp, str ); + close_stream( fp ); + + num = buffer_new( file, &buffer, size ); + + assert( num == strlen( str ) ); + assert( buffer->token_pos == 0 ); + assert( buffer->buffer_pos == 0 ); + assert( buffer->buffer_end == 6 ); + assert( buffer->buffer_size == size ); + assert( buffer->eof == TRUE ); + assert( strcmp( str, buffer->str ) == 0 ); + + buffer_destroy( &buffer ); + + buffer = NULL; + + file_unlink( file ); + + fprintf( stderr, "OK\n" ); +} + + void test_buffer_read() { fprintf( stderr, " Testing buffer_read ... " ); - char *file = "/tmp/test_buffer_read"; - char *str = "MARTIN"; - FILE *fp; - size_t i; - file_buffer *buffer; + char *file = "/tmp/test_buffer_read"; + char *str = "MARTIN"; + FILE *fp = NULL; + size_t size = 2; + size_t num = 0; + file_buffer *buffer = NULL; fp = write_open( file ); - fprintf( fp, str ); - close_stream( fp ); - buffer = buffer_read( file ); + num = buffer_new( file, &buffer, size ); - assert( buffer->pos == 0 ); - assert( buffer->use == 0 ); - assert( buffer->end == 5 ); - assert( buffer->eof == TRUE ); + assert( num == 2 ); + assert( buffer->token_pos == 0 ); + assert( buffer->buffer_pos == 0 ); + assert( buffer->buffer_end == 2 ); + assert( buffer->buffer_size == size ); + assert( buffer->eof == FALSE ); + assert( strcmp( buffer->str, "MA" ) == 0 ); - for ( i = 0; str[ i ]; i++ ) { - assert( str[ i ] == buffer->str[ i ] ); - } + buffer->buffer_pos += 2; + + num = buffer_read( &buffer ); + + assert( num == 2 ); + assert( buffer->token_pos == 0 ); + assert( buffer->buffer_pos == 2 ); + assert( buffer->buffer_end == 4 ); + assert( buffer->buffer_size == size ); + assert( buffer->eof == FALSE ); + assert( strcmp( buffer->str, "MART" ) == 0 ); + + buffer->buffer_pos += 2; + + num = buffer_read( &buffer ); + + assert( num == 2 ); + assert( buffer->token_pos == 0 ); + assert( buffer->buffer_pos == 4 ); + assert( buffer->buffer_end == 6 ); + assert( buffer->buffer_size == size ); + assert( buffer->eof == FALSE ); + assert( strcmp( buffer->str, "MARTIN" ) == 0 ); + + buffer->buffer_pos += 2; + + num = buffer_read( &buffer ); + + assert( num == 0 ); + assert( buffer->token_pos == 0 ); + assert( buffer->buffer_pos == 6 ); + assert( buffer->buffer_end == 6 ); + assert( buffer->buffer_size == size ); + assert( buffer->eof == TRUE ); + assert( strcmp( buffer->str, "MARTIN" ) == 0 ); buffer_destroy( &buffer ); buffer = NULL; file_unlink( file ); - fprintf( stderr, "OK\n" ); } @@ -223,27 +299,24 @@ void test_buffer_getc() { fprintf( stderr, " Testing buffer_getc ... " ); - char *file = "/tmp/test_buffer_getc"; - char *str = "MARTIN"; - FILE *fp; - size_t i; - char c; - file_buffer *buffer; + char *file = "/tmp/test_buffer_getc"; + char *str = "MARTIN"; + FILE *fp = NULL; + size_t size = strlen( str ); + size_t i = 0; + char c = 0; + file_buffer *buffer = NULL; fp = write_open( file ); - fprintf( fp, str ); - close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, size ); for ( i = 0; str[ i ]; i++ ) { c = buffer_getc( buffer ); - assert( c != EOF ); - assert( str[ i ] == c ); } @@ -261,12 +334,12 @@ void test_buffer_ungetc() { fprintf( stderr, " Testing buffer_ungetc ... " ); - char *file = "/tmp/test_buffer_ungetc"; - char *str = "MARTIN"; - FILE *fp; - char c; - size_t i; - file_buffer *buffer; + char *file = "/tmp/test_buffer_ungetc"; + char *str = "MARTIN"; + FILE *fp = NULL; + char c = '\0'; + size_t i = 0; + file_buffer *buffer = NULL; fp = write_open( file ); @@ -274,7 +347,7 @@ void test_buffer_ungetc() close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, TEST_SIZE ); c = buffer_getc( buffer ); @@ -313,20 +386,19 @@ void test_buffer_gets() { fprintf( stderr, " Testing buffer_gets ... " ); - char *file = "/tmp/test_buffer_gets"; - char *out = "MARTIN\nASSER\nHANSEN\n"; - FILE *fp; - char *str; - int i; - file_buffer *buffer; + char *file = "/tmp/test_buffer_gets"; + char *out = "MARTIN\nASSER\nHANSEN\n"; + FILE *fp = NULL; + char *str = NULL; + size_t size = 1; + size_t i = 0; + file_buffer *buffer = NULL; fp = write_open( file ); - fprintf( fp, out ); - close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, size ); i = 0; @@ -357,12 +429,13 @@ void test_buffer_ungets() { fprintf( stderr, " Testing buffer_ungets ... " ); - char *file = "/tmp/test_buffer_ungets"; - char *out = "MARTIN\nASSER\nHANSEN\n"; - FILE *fp; - char *str1; - char *str2; - file_buffer *buffer; + char *file = "/tmp/test_buffer_ungets"; + char *out = "MARTIN\nASSER\nHANSEN\n"; + FILE *fp = NULL; + size_t size = 1; + char *str1 = NULL; + char *str2 = NULL; + file_buffer *buffer = NULL; fp = write_open( file ); @@ -370,7 +443,7 @@ void test_buffer_ungets() close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, size ); str1 = buffer_gets( buffer ); @@ -404,22 +477,21 @@ void test_buffer_new_size() { fprintf( stderr, " Testing buffer_new_size ... " ); - char *file = "/tmp/test_buffer_new_size"; - char *str = "X"; - FILE *fp; - file_buffer *buffer; + char *file = "/tmp/test_buffer_new_size"; + char *str = "X"; + size_t size = 1; + FILE *fp = NULL; + file_buffer *buffer = NULL; fp = write_open( file ); - fprintf( fp, str ); - close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, size ); buffer_new_size( buffer, 201048577 ); - assert( buffer->size == 268435456 ); + assert( buffer->buffer_size == 268435456 ); buffer_destroy( &buffer ); @@ -435,25 +507,26 @@ void test_buffer_resize() { fprintf( stderr, " Testing buffer_resize ... " ); - char *file = "/tmp/test_buffer_new_size"; - char *str = "ABC"; - FILE *fp; - char c; - file_buffer *buffer; + char *file = "/tmp/test_buffer_resize"; + char *str = "ABC"; + FILE *fp = NULL; + size_t size = 1; + size_t i = 0; + char c = 0; + file_buffer *buffer = NULL; fp = write_open( file ); - fprintf( fp, str ); - close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, size ); + + i = 0; while ( ( c = buffer_getc( buffer ) ) != EOF ) { - printf( "C: %c\n", c ); - - + assert( c == str[ i ] ); + i++; } buffer_destroy( &buffer ); @@ -468,12 +541,15 @@ void test_buffer_resize() void test_buffer_move() { - fprintf( stderr, " Testing buffer_resize ... " ); + fprintf( stderr, " Testing buffer_move ... " ); - char *file = "/tmp/test_buffer_new_size"; - char *str = "ABCDEFG"; - FILE *fp; - file_buffer *buffer; + char *file = "/tmp/test_buffer_move"; + char *str = "ABCDEFG"; + size_t size = strlen( str ); + size_t new_size = 0; + size_t move = 3; + FILE *fp = NULL; + file_buffer *buffer = NULL; fp = write_open( file ); @@ -481,13 +557,17 @@ void test_buffer_move() close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, size ); - buffer_print( buffer ); + new_size = buffer_move( buffer, size, move ); - buffer_move( buffer, 7, 2 ); + assert( new_size == strlen( buffer->str ) ); + assert( strcmp( buffer->str, "DEFG" ) == 0 ); - buffer_print( buffer ); + new_size = buffer_move( buffer, new_size, move ); + + assert( new_size == strlen( buffer->str ) ); + assert( strcmp( buffer->str, "G" ) == 0 ); buffer_destroy( &buffer ); @@ -503,9 +583,9 @@ void test_buffer_destroy() { fprintf( stderr, " Testing buffer_destroy ... " ); - char *file = "/tmp/test_buffer_destroy"; - char *str = "X"; - FILE *fp; + char *file = "/tmp/test_buffer_destroy"; + char *str = "X"; + FILE *fp = NULL; file_buffer *buffer = NULL; fp = write_open( file ); @@ -514,7 +594,7 @@ void test_buffer_destroy() close_stream( fp ); - buffer = buffer_read( file ); + buffer_new( file, &buffer, TEST_SIZE ); buffer_destroy( &buffer ); @@ -534,9 +614,9 @@ void test_buffer_print() { fprintf( stderr, " Testing buffer_print ... " ); - file_buffer *buffer; + file_buffer *buffer = NULL; - buffer = buffer_read( TEST_FILE ); + buffer_new( TEST_FILE, &buffer, TEST_SIZE ); // buffer_print( buffer ); diff --git a/code_c/Maasha/src/test_all.pl b/code_c/Maasha/src/testall.pl similarity index 100% rename from code_c/Maasha/src/test_all.pl rename to code_c/Maasha/src/testall.pl diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index b5c5325..8fdce8b 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -929,6 +929,8 @@ sub get_options use_score|u visibility|v=s wiggle|w + score|S + log10|L color|c=s chunk_size|C=s ); @@ -5854,7 +5856,9 @@ sub script_upload_to_ucsc $end = $entry->{ 'CHR_END' }; $q_id = $entry->{ 'Q_ID' }; - if ( $q_id =~ /_(\d+)$/ ) { + if ( $options->{ "score" } ) { + $clones = $entry->{ 'SCORE' }; + } if ( $q_id =~ /_(\d+)$/ ) { $clones = $1; } else { $clones = 1; @@ -5890,7 +5894,7 @@ sub script_upload_to_ucsc close $fh_in; - Maasha::UCSC::fixedstep_put_entry( $chr, $beg_block, $block, $fh_out ); + Maasha::UCSC::fixedstep_put_entry( $chr, $beg_block, $block, $fh_out, $options->{ "log10" } ); unlink "$BP_TMP/$chr"; } diff --git a/code_perl/Maasha/Calc.pm b/code_perl/Maasha/Calc.pm index 34959a0..4271182 100644 --- a/code_perl/Maasha/Calc.pm +++ b/code_perl/Maasha/Calc.pm @@ -316,6 +316,21 @@ sub sum } +sub log10 +{ + # Martin A. Hansen, August 2008. + + # Calculate the log10 of a given number. + + my ( $num, # number + ) = @_; + + # Returns a float. + + return log( $num ) / log( 10 ); +} + + sub overlap { # Martin A. Hansen, November 2003. diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 9964d88..83d8842 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -1488,23 +1488,21 @@ sub fixedstep_put_entry $beg, # start position $block, # list of scores $fh, # filehandle - OPTIONAL + $log10, # flag indicating that log10 scores should be used ) = @_; # Returns nothing. $beg += 1; # fixedStep format is 1 based. - if ( $fh ) - { - print $fh "fixedStep chrom=$chr start=$beg step=1\n"; + $fh ||= \*STDOUT; - map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block }; - } - else - { - print "fixedStep chrom=$chr start=$beg step=1\n"; + print $fh "fixedStep chrom=$chr start=$beg step=1\n"; - map { printf( "%d\n", ( $_ + 1 ) ) } @{ $block }; + if ( $log10 ) { + map { printf( $fh "%d\n", Maasha::Calc::log10( $_ + 1 ) ) } @{ $block }; + } else { + map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block }; } } -- 2.39.5