#include <assert.h>
#include <errno.h>
-/* Define a shorthand for unsigned int */
-//typedef uint unsigned int
+typedef char bool;
-/* Define a boolean type */
-#define bool char
#define TRUE 1
#define FALSE 0
#define ABS( x ) ( ( x ) < 0 ) ? -( x ) : ( x )
#define INT( x ) ( int ) x
+/* Neat debug macro. */
+#define DEBUG_EXIT 0
+#define die assert( DEBUG_EXIT )
+
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
};
-/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ERROR HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
-
-
-/* Print error message to stderr and exit. */
-void die( char *error_msg );
-
-/* Print warning message to stderr. */
-void warn( char *warn_msg );
-
-
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
#define FASTA_BUFFER 256 * 1024
/* Structure of a sequence entry. */
-struct seq_entry
+struct _seq_entry
{
char *seq_name;
char *seq;
size_t seq_len;
};
-typedef struct seq_entry seq_entry;
+typedef struct _seq_entry seq_entry;
/* Count all entries in a FASTA file given a file pointer. */
size_t fasta_count( FILE *fp );
/* Get next sequence entry from a FASTA file given a file pointer. */
-bool fasta_get_entry( FILE *fp, seq_entry **entry );
+bool fasta_get_entry( file_buffer *buffer, seq_entry **entry );
/* Output a sequence entry in FASTA format. */
void fasta_put_entry( seq_entry *entry );
void fasta_put_entries( struct list *entries );
/* Deallocates memory from a seq_entry. */
-void fasta_free_entry( struct seq_entry *entry );
+void fasta_free_entry( seq_entry *entry );
-//#define FILE_BUFFER_SIZE 64 * 1024
-#define FILE_BUFFER_SIZE 1024 * 1024
+#define FILE_BUFFER_SIZE 64 * 1024
+//#define FILE_BUFFER_SIZE 1
-struct file_buffer
+struct _file_buffer
{
FILE *fp; /* file pointer */
char *str; /* the buffer string */
- size_t pos; /* index pointing to last position where some token was found */
- size_t use; /* index indicating how much of the buffer is scanned */
- size_t end; /* end position of buffer */
- size_t size; /* default buffer size */
- bool eof; /* flag indicating that buffer reached EOF */
+ size_t pos; /* index pointing to last position where some token was found */
+ size_t len; /* length of some found token */
+ size_t use; /* index indicating how much of the buffer is scanned */
+ size_t end; /* end position of buffer */
+ long size; /* default buffer size */
+ bool eof; /* flag indicating that buffer reached EOF */
};
+typedef struct _file_buffer file_buffer;
+
/* Read-open a file and return a file pointer. */
FILE *read_open( char *file );
/* Opens a file for reading and loads a new buffer.*/
-struct file_buffer *read_open_buffer( char *file );
+file_buffer *buffer_read( char *file );
/* Get the next char from a file buffer, which is resized if necessary, until EOF.*/
-char buffer_getc( struct file_buffer *buffer );
+char buffer_getc( file_buffer *buffer );
+
+/* Rewinds the file buffer one char, i.e. put one char back on the buffer. */
+void buffer_ungetc( file_buffer *buffer );
/* Get the next line that is terminated by \n or EOF from a file buffer. */
-char *buffer_gets( struct file_buffer *buffer );
+char *buffer_gets( file_buffer *buffer );
-/* Increases buffer size until it is larger than len. */
-void buffer_new_size( struct file_buffer *buffer, int len );
+/* Rewind the file buffer one line, i.e. put one line back on the buffer. */
+void buffer_ungets( file_buffer *buffer );
+
+/* Doubles buffer size until it is larger than len. */
+void buffer_new_size( file_buffer *buffer, long len );
/* Resize file buffer discarding any old buffer before offset, */
/* and merge remaining old buffer with a new chunk of buffer. */
-void buffer_resize( struct file_buffer *buffer );
+void buffer_resize( file_buffer *buffer );
+
+/* Moves file buffer of a given size num positions to the left. */
+void buffer_move( file_buffer *buffer, size_t size, size_t num );
/* Deallocates memory and close stream used by file buffer. */
-void buffer_destroy( struct file_buffer *buffer );
+void buffer_destroy( file_buffer **buffer );
/* Debug function that prints the content of a file_buffer. */
-void buffer_print( struct file_buffer *buffer );
+void buffer_print( file_buffer *buffer );
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
#include "mem.h"
-/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ERROR HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
-
-
-void die( char *msg )
-{
- /* Martin A. Hansen, May 2008 */
-
- /* Print error message and exits. */
-
- fprintf( stderr, "ERROR: %s\n", msg );
-
- exit( 1 );
-}
-
-
-void warn( char *msg )
-{
- /* Martin A. Hansen, May 2008 */
-
- /* Print warning message and exits. */
-
- fprintf( stderr, "WARNING: %s\n", msg );
-}
-
-
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
#include "common.h"
#include "mem.h"
+#include "filesys.h"
#include "fasta.h"
#include "list.h"
}
-bool fasta_get_entry( FILE *fp, seq_entry **entry )
+bool fasta_get_entry( file_buffer *buffer, seq_entry **entry )
{
- /* Martin A. Hansen, May 2008 */
+ /* Martin A. Hansen, August 2008 */
- /* Get next sequence entry from a FASTA file given a file pointer. */
+ /* Get next sequence entry from a FASTA file given a file buffer. */
- size_t i;
- size_t j;
- size_t offset;
+ char *line;
+ size_t seq_name_len;
size_t seq_len;
- char buffer[ FASTA_BUFFER ];
- size_t buffer_len;
- char *seq_name = NULL;
- char *seq = NULL;
-
- offset = ftell( fp );
-
- /* ---- Skip ahead until header line and include header ---- */
+ char *seq_name;
+ char *seq;
+ size_t i;
- while ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
+ while ( 1 )
{
- buffer_len = strlen( buffer );
-
- offset += buffer_len;
-
- if ( ( buffer[ 0 ] == '>' ) )
+ if ( ( line = buffer_gets( buffer ) ) != NULL )
{
- seq_name = mem_get( buffer_len - 1 );
-
- for ( i = 1; i < buffer_len - 1; i++ ) {
- seq_name[ i - 1 ] = buffer[ i ];
- }
+ if ( line[ 0 ] == '>' )
+ {
+ seq_name_len = buffer->len - 2;
+ seq_name = mem_get( seq_name_len + 1 );
- seq_name[ i ] = '\0';
+ memcpy( seq_name, &line[ 1 ], seq_name_len );
- break;
+ break;
+ }
}
}
- /* ---- Determine length of sequence ---- */
+ seq = mem_get( 1 );
seq_len = 0;
- while ( ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) )
+ while ( 1 )
{
- for ( i = 0; buffer[ i ]; i++ )
+ if ( ( line = buffer_gets( buffer ) ) != NULL )
{
- if ( buffer[ i ] > 32 && buffer[ i ] < 127 ) {
- seq_len++;
+ if ( line[ 0 ] == '>' )
+ {
+ buffer_ungets( buffer );
+
+ break;
}
- }
+ else
+ {
+ mem_resize( seq, seq_len + strlen( line ) );
+
+ for ( i = 0; line[ i ]; i++ )
+ {
+ if ( line[ i ] > 32 && line[ i ] < 127 )
+ {
+ seq[ seq_len ] = line[ i ];
- if ( ( buffer[ 0 ] == '>' ) )
+ seq_len++;
+ }
+ }
+ }
+ }
+ else
{
- seq_len -= strlen( buffer ) - 1;
-
break;
}
}
- /* ---- Allocate memory for sequence ---- */
-
- seq = mem_get( seq_len + 1 );
-
- /* ---- Rewind file pointer and read sequence ---- */
-
- if ( fseek( fp, offset, SEEK_SET ) != 0 )
+ if ( seq_len == 0 )
{
- fprintf( stderr, "ERROR: fseek SEEK_SET failed: %s\n", strerror( errno ) );
- abort();
+ return FALSE;
}
+// seq = mem_resize( seq, seq_len + 1 );
- j = 0;
-
- while ( ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) )
- {
- for ( i = 0; buffer[ i ]; i++ )
- {
- if ( buffer[ i ] > 32 && buffer[ i ] < 127 )
- {
- seq[ j ] = buffer[ i ];
-
- if ( j == seq_len - 1 )
- {
- seq[ j + 1 ] = '\0';
+ seq[ seq_len + 1 ] = '\0';
- ( *entry )->seq_name = seq_name;
- ( *entry )->seq = seq;
- ( *entry )->seq_len = seq_len;
+// should probably use memcpy below
- return TRUE;
- }
-
- j++;
- }
- }
- }
+ ( *entry )->seq_name = seq_name;
+ ( *entry )->seq = seq;
+ ( *entry )->seq_len = seq_len;
return FALSE;
}
-void fasta_put_entry( struct seq_entry *entry )
+//bool fasta_get_entry( FILE *fp, seq_entry **entry )
+//{
+// /* Martin A. Hansen, May 2008 */
+//
+// /* Unit test done.*/
+//
+// /* Get next sequence entry from a FASTA file given a file pointer. */
+//
+// size_t i;
+// size_t offset;
+// size_t seq_buffer_len;
+// size_t seq_len;
+// size_t buffer_read;
+// char buffer[ FASTA_BUFFER ];
+// size_t buffer_len;
+// char *seq_name = NULL;
+// char *seq = NULL;
+//
+// offset = ftell( fp );
+//
+// /* ---- Skip ahead until header line and include header ---- */
+//
+// while ( 1 )
+// {
+// if ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
+// {
+// buffer_len = strlen( buffer );
+//
+// offset += buffer_len;
+//
+// if ( ( buffer[ 0 ] == '>' ) )
+// {
+// seq_name = mem_get( buffer_len - 1 );
+//
+// memcpy( seq_name, &buffer[ 1 ], buffer_len - 2 );
+//
+// seq_name[ buffer_len - 2 ] = '\0';
+//
+// break;
+// }
+// }
+// else
+// {
+// if ( ferror( fp ) != 0 )
+// {
+// fprintf( stderr, "ERROR: get_fasta_seq failed: %s\n", strerror( errno ) );
+// abort();
+// }
+// else if ( feof( fp ) != 0 )
+// {
+// return FALSE;
+// }
+// }
+// }
+//
+// /* ---- Determine approximate length of sequence ---- */
+//
+// seq_buffer_len = 0;
+//
+// while ( 1 )
+// {
+// if ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
+// {
+// if ( ( buffer[ 0 ] == '>' ) )
+// {
+// assert( seq_buffer_len != 0 );
+//
+// break;
+// }
+// else
+// {
+// seq_buffer_len += strlen( buffer );
+// }
+// }
+// else
+// {
+// if ( ferror( fp ) != 0 )
+// {
+// fprintf( stderr, "ERROR: get_fasta_seq failed: %s\n", strerror( errno ) );
+// abort();
+// }
+// else if ( feof( fp ) != 0 )
+// {
+// break;
+// }
+// }
+// }
+//
+// /* ---- Allocate approximate memory for sequence ---- */
+//
+// seq = mem_get( seq_buffer_len + 1 );
+//
+// /* ---- Rewind file pointer and read sequence ---- */
+//
+// if ( fseek( fp, offset, SEEK_SET ) != 0 )
+// {
+// fprintf( stderr, "ERROR: fseek SEEK_SET failed: %s\n", strerror( errno ) );
+// abort();
+// }
+//
+// buffer_read = 0;
+// seq_len = 0;
+//
+// while ( buffer_read < seq_buffer_len )
+// {
+// if ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
+// {
+// for ( i = 0; buffer[ i ]; i++ )
+// {
+// if ( buffer[ i ] > 32 && buffer[ i ] < 127 )
+// {
+// seq[ seq_len ] = buffer[ i ];
+//
+// seq_len++;
+// }
+// }
+//
+// buffer_read += i;
+// }
+// else
+// {
+// if ( ferror( fp ) != 0 )
+// {
+// fprintf( stderr, "ERROR: get_fasta_seq failed: %s\n", strerror( errno ) );
+// abort();
+// }
+// else if ( feof( fp ) != 0 )
+// {
+// fprintf( stderr, "ERROR: get_fasta_seq failed: EOF\n" );
+// abort();
+// }
+// }
+// }
+//
+//// seq = mem_resize( seq, seq_len + 1 );
+//
+// seq[ seq_len + 1 ] = '\0';
+//
+// ( *entry )->seq_name = seq_name;
+// ( *entry )->seq = seq;
+// ( *entry )->seq_len = seq_len;
+//
+// return TRUE;
+//}
+
+
+void fasta_put_entry( seq_entry *entry )
{
/* Martin A. Hansen, May 2008 */
}
-void fasta_free_entry( struct seq_entry *entry )
+void fasta_free_entry( seq_entry *entry )
{
/* Martin A. Hansen, June 2008 */
fprintf( stderr, "ERROR: file_read failed\n" );
abort();
}
- else if ( feof( fp ) )
- {
- fprintf( stderr, "ERROR: file_read failed - end-of-file reached\n" );
-
- abort();
- }
string[ len ] = '\0';
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FILE BUFFER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
-struct file_buffer *read_open_buffer( char *file )
+file_buffer *buffer_read( char *file )
{
/* Martin A. Hansen, June 2008 */
/* Opens a file for reading and loads a new buffer.*/
- struct file_buffer *buffer;
- FILE *fp;
- char *str;
- bool eof;
+ file_buffer *buffer;
+ FILE *fp;
+ char *str;
+ bool eof;
- buffer = mem_get( sizeof( buffer ) );
+ buffer = mem_get( sizeof( file_buffer ) );
fp = read_open( file );
buffer->fp = fp;
buffer->str = str;
buffer->pos = 0;
+ buffer->len = 0;
buffer->use = 0;
- buffer->end = strlen( str );
+ buffer->end = strlen( str ) - 1;
buffer->size = FILE_BUFFER_SIZE;
buffer->eof = eof;
}
-char buffer_getc( struct file_buffer *buffer )
+char buffer_getc( file_buffer *buffer )
{
/* Martin A. Hansen, June 2008 */
while ( 1 )
{
- if ( buffer->use == buffer->end )
+ if ( buffer->use == buffer->end + 1 )
{
if ( buffer->eof )
{
- return '\0';
+ return EOF;
}
else
{
}
-char *buffer_gets( struct file_buffer *buffer )
+void buffer_ungetc( file_buffer *buffer )
+{
+ /* Martin A. Hansen, August 2008. */
+
+ /* Rewinds the file buffer one char, */
+ /* i.e. put one char back on the buffer. */
+
+ assert( buffer->use > 0 );
+
+ buffer->use--;
+}
+
+
+char *buffer_gets( file_buffer *buffer )
{
/* Martin A. Hansen, June 2008 */
while ( 1 )
{
- if ( ( pt = memchr( &buffer->str[ buffer->use ], '\n', buffer->end - buffer->use ) ) != NULL )
+ if ( ( pt = memchr( &buffer->str[ buffer->use ], '\n', buffer->end + 1 - buffer->use ) ) != NULL )
{
line_size = pt - &buffer->str[ buffer->use ] + 1;
- line = mem_get( line_size );
+ line = mem_get( line_size + 1 );
memcpy( line, &buffer->str[ buffer->use ], line_size );
line[ line_size ] = '\0';
+ buffer->len = line_size;
buffer->use += line_size;
buffer_new_size( buffer, line_size );
}
else
{
- if ( buffer->eof ) {
- return NULL;
- } else {
+ if ( buffer->eof )
+ {
+ if ( buffer->use < buffer->end )
+ {
+ line_size = buffer->end - buffer->use + 1;
+
+ line = mem_get( line_size + 1 );
+
+ memcpy( line, &buffer->str[ buffer->use ], line_size );
+
+ line[ line_size ] = '\0';
+
+ buffer->len = line_size;
+ buffer->use += line_size;
+
+ return line;
+ }
+ else
+ {
+ return NULL;
+ }
+ }
+ else
+ {
buffer_resize( buffer );
}
}
}
-void buffer_new_size( struct file_buffer *buffer, int len )
+void buffer_ungets( file_buffer *buffer )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Rewind the file buffer one line, */
+ /* i.e. put one line back on the buffer. */
+
+ assert( buffer->use >= buffer->len );
+
+ buffer->use -= buffer->len;
+}
+
+
+void buffer_new_size( file_buffer *buffer, long len )
{
/* Martin A. Hansen, June 2008 */
- /* Increases buffer size until it is larger than len. */
+ /* Doubles buffer size until it is larger than len. */
- while ( buffer->size < len )
+ while ( buffer->size <= len )
{
buffer->size <<= 1;
- if ( buffer->size <= 0 ) {
- die( "buffer_new_size failed." );
+ printf( "SIZE: %ld\n", buffer->size );
+
+ if ( buffer->size <= 0 )
+ {
+ fprintf( stderr, "ERROR: buffer_new_size failed.\n" );
+ abort();
}
}
}
-void buffer_resize( struct file_buffer *buffer )
+void buffer_resize( file_buffer *buffer )
{
/* Martin A. Hansen, June 2008 */
size_t str_len;
size_t new_end;
+ buffer_print( buffer );
str = file_read( buffer->fp, buffer->size );
+ printf( "STR: %s\n", str );
+
str_len = strlen( str );
+ printf( "STR_LEN: %zu\n", str_len );
+
feof( buffer->fp ) ? ( buffer->eof = TRUE ) : ( buffer->eof = FALSE );
+ printf( "EOF: %i\n", buffer->eof );
+
if ( buffer->pos != 0 )
{
+ assert( buffer->end >= buffer->pos );
+ assert( ( buffer->use - buffer->pos ) != 0 );
memmove( buffer->str, &buffer->str[ buffer->pos ], buffer->use - buffer->pos );
buffer->end -= buffer->pos;
new_end = buffer->end + str_len;
+ printf( "END: %zu\n", buffer->end );
+ printf( "NEW_END: %zu\n", new_end );
+
buffer->str = mem_resize( buffer->str, new_end + 1 );
memcpy( &buffer->str[ buffer->end ], str, str_len );
- buffer->str[ new_end + 1 ] = '\0';
-
+ buffer->str[ str_len ] = '\0';
buffer->end = new_end;
+ buffer_print( buffer );
+ die;
mem_free( ( void * ) &str );
}
-void buffer_destroy( struct file_buffer *buffer )
+void buffer_move( file_buffer *buffer, size_t size, size_t num )
+{
+ /* Martin A. Hansen, August 2008 */
+
+ /* Moves file buffer of a given size num positions to the left. */
+
+ memmove( buffer->str, &buffer->str[ num ], size );
+
+ buffer->end -= num;
+ buffer->use = 0;
+ buffer->pos = 0;
+}
+
+
+void buffer_destroy( file_buffer **buffer )
{
/* Martin A. Hansen, June 2008 */
/* Deallocates memory and close stream used by file buffer. */
- close_stream( buffer->fp );
+ file_buffer *pt = *buffer;
+
+ assert( pt != NULL );
+
+ close_stream( pt->fp );
- mem_free( ( void * ) &buffer->str );
- mem_free( ( void * ) &buffer );
+ mem_free( ( void * ) &pt->str );
+ mem_free( ( void * ) &pt );
}
-void buffer_print( struct file_buffer *buffer )
+void buffer_print( file_buffer *buffer )
{
/* Martin A. Hansen, June 2008 */
/* Debug function that prints the content of a file_buffer. */
- printf( "buffer: {\n" );
- printf( " pos : %lu\n", buffer->pos );
- printf( " use : %lu\n", buffer->use );
- printf( " end : %lu\n", buffer->end );
+ printf( "\nbuffer: {\n" );
+ printf( " pos : %zu\n", buffer->pos );
+ printf( " len : %zu\n", buffer->len );
+ printf( " use : %zu\n", buffer->use );
+ printf( " end : %zu\n", buffer->end );
+ printf( " size : %ld\n", buffer->size );
printf( " eof : %d\n", buffer->eof );
printf( " str : ->%s<-\n", buffer->str );
- printf( " str_len: %lu\n", strlen( buffer->str ) );
+ printf( " str_len: %zu\n", strlen( buffer->str ) );
printf( "}\n" );
}
} else if ( is_rna( seq ) ) {
complement_rna( seq );
} else {
- die( "Complement nuc failed.\n" );
+ abort();
}
}
} else if ( is_protein( seq ) ) {
type = "PROTEIN";
} else {
- die( "Could not guess sequence type.\n" );
+ abort();
}
return type;
int bin;
if ( strlen( oligo ) > 15 ) {
- die( "Oligo will not fit in an integer." );
+ abort();
}
bin = 0;
case 'U': case 'u': bin |= 1; break;
case 'C': case 'c': bin |= 2; break;
case 'G': case 'g': bin |= 3; break;
- default: die( "Unrecognized nucleotide." );
+ default: abort();
}
}
if ( ( new_line = memchr( string, '\n', 1024 ) ) != NULL ) {
new_line_pos = new_line - string;
} else {
- die( "bed_split: no newline found." );
+ abort();
}
field_num = 0;
}
else
{
- die( "bed_split: no tab found." );
+ abort();
}
offset += pos + 1;
}
if ( pt == NULL ) {
- die( "bed parse failed." );
+ abort();
}
offset = field_seps[ i ] + 1;
array = oligo_count( path );
- oligo_count_output( path, array );
+ //oligo_count_output( path, array );
return 0;
}
/* Count the occurence of all oligos of a fixed size in a FASTA file. */
- uint *array = NULL;
- uint i = 0;
- uint mask = 0;
- uint bin = 0;
- uint bin_rc1 = 0;
- uint bin_rc2 = 0;
- uint j = 0;
- uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */
- uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */
- uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */
- struct seq_entry *entry = NULL;
- FILE *fp = NULL;
+ uint *array = NULL;
+ uint i = 0;
+ uint mask = 0;
+ uint bin = 0;
+ uint bin_rc1 = 0;
+ uint bin_rc2 = 0;
+ uint j = 0;
+ uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */
+ uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */
+ uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */
+ seq_entry *entry = NULL;
+ FILE *fp = NULL;
+ file_buffer *buffer = NULL;
array = mem_get_zero( sizeof( uint ) * SIZE );
fp = read_open( path );
- while ( ( fasta_get_entry( fp, entry ) ) )
+ while ( ( fasta_get_entry( buffer, &entry ) ) )
{
fprintf( stderr, "Counting oligos in: %s ... ", entry->seq_name );
}
-void oligo_count_output( char *path, uint *array )
-{
- /* Martin A. Hansen, June 2008 */
-
- /* Output oligo count for each sequence position. */
-
- struct seq_entry *entry;
- FILE *fp;
- uint mask;
- uint i;
- uint j;
- uint bin;
- int count;
- uint *block;
- uint block_pos;
- uint block_beg;
- uint block_size;
- uint chr_pos;
-
- mask = mask_create( OLIGO_SIZE );
-
- entry = mem_get( sizeof( entry ) );
-
- fp = read_open( path );
-
- while ( ( fasta_get_entry( fp, entry ) ) )
- {
- fprintf( stderr, "Writing results for: %s ... ", entry->seq_name );
-
- bin = 0;
- j = 0;
- block_pos = 0;
- block_size = sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE );
- block = mem_get_zero( block_size );
-
- for ( i = 0; entry->seq[ i ]; i++ )
- {
- bin <<= 2;
-
- switch( entry->seq[ i ] )
- {
- case 'A': case 'a': j++; break;
- case 'T': case 't': bin |= T; j++; break;
- case 'C': case 'c': bin |= C; j++; break;
- case 'G': case 'g': bin |= G; j++; break;
- default: bin = 0; j = 0; break;
- }
-
- if ( j >= OLIGO_SIZE )
- {
- count = array[ ( bin & mask ) ];
-
- if ( count > 1 )
- {
- chr_pos = i - OLIGO_SIZE + 1;
-
- if ( block_pos == 0 )
- {
- memset( block, '\0', block_size );
-
- block_beg = chr_pos;
-
- block[ block_pos ] = count;
-
- block_pos++;
- }
- else
- {
- if ( chr_pos > block_beg + block_pos )
- {
- fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
-
- block_pos = 0;
- }
- else
- {
- block[ block_pos ] = count;
-
- block_pos++;
- }
- }
- }
- }
- }
-
- if ( block_pos > 0 )
- {
- fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
-
- mem_free( ( void * ) &block );
- }
-
- fprintf( stderr, "done.\n" );
- }
-
- close_stream( fp );
-
- fasta_free_entry( entry );
-}
+//void oligo_count_output( char *path, uint *array )
+//{
+// /* Martin A. Hansen, June 2008 */
+//
+// /* Output oligo count for each sequence position. */
+//
+// struct seq_entry *entry;
+// FILE *fp;
+// uint mask;
+// uint i;
+// uint j;
+// uint bin;
+// int count;
+// uint *block;
+// uint block_pos;
+// uint block_beg;
+// uint block_size;
+// uint chr_pos;
+// file_buffer *buffer;
+//
+// mask = mask_create( OLIGO_SIZE );
+//
+// entry = mem_get( sizeof( entry ) );
+//
+// fp = read_open( path );
+//
+// while ( ( fasta_get_entry( buffer, &entry ) ) )
+// {
+// fprintf( stderr, "Writing results for: %s ... ", entry->seq_name );
+//
+// bin = 0;
+// j = 0;
+// block_pos = 0;
+// block_size = sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE );
+// block = mem_get_zero( block_size );
+//
+// for ( i = 0; entry->seq[ i ]; i++ )
+// {
+// bin <<= 2;
+//
+// switch( entry->seq[ i ] )
+// {
+// case 'A': case 'a': j++; break;
+// case 'T': case 't': bin |= T; j++; break;
+// case 'C': case 'c': bin |= C; j++; break;
+// case 'G': case 'g': bin |= G; j++; break;
+// default: bin = 0; j = 0; break;
+// }
+//
+// if ( j >= OLIGO_SIZE )
+// {
+// count = array[ ( bin & mask ) ];
+//
+// if ( count > 1 )
+// {
+// chr_pos = i - OLIGO_SIZE + 1;
+//
+// if ( block_pos == 0 )
+// {
+// memset( block, '\0', block_size );
+//
+// block_beg = chr_pos;
+//
+// block[ block_pos ] = count;
+//
+// block_pos++;
+// }
+// else
+// {
+// if ( chr_pos > block_beg + block_pos )
+// {
+// fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
+//
+// block_pos = 0;
+// }
+// else
+// {
+// block[ block_pos ] = count;
+//
+// block_pos++;
+// }
+// }
+// }
+// }
+// }
+//
+// if ( block_pos > 0 )
+// {
+// fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
+//
+// mem_free( ( void * ) &block );
+// }
+//
+// fprintf( stderr, "done.\n" );
+// }
+//
+// close_stream( fp );
+//
+// fasta_free_entry( entry );
+//}
void fixedstep_put_entry( char *chr, int beg, int step_size, uint *block_array, int block_size )
all: test
-test: test_fasta test_filesys test_mem test_strings
+test: test_common test_fasta test_filesys test_mem test_strings
+
+test_common: test_common.c $(LIB_DIR)common.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_common.c -o test_common
test_fasta: test_fasta.c $(LIB_DIR)fasta.c
$(CC) $(Cflags) $(INC) $(LIB) test_fasta.c -o test_fasta
$(CC) $(Cflags) $(INC) $(LIB) test_strings.c -o test_strings
clean:
+ rm test_common
rm test_fasta
rm test_filesys
rm test_mem
--- /dev/null
+#include "common.h"
+
+static void test_true();
+static void test_bool();
+
+int main()
+{
+ fprintf( stderr, "Running all tests for common.c\n" );
+
+ test_true();
+ test_bool();
+
+ fprintf( stderr, "Done\n\n" );
+
+ return EXIT_SUCCESS;
+}
+
+
+void test_true()
+{
+ fprintf( stderr, " Testing true ... " );
+
+ assert( TRUE == 1 );
+ assert( FALSE == 0 );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_bool()
+{
+ fprintf( stderr, " Testing bool ... " );
+
+ bool answer;
+
+ answer = TRUE;
+
+ assert( answer == TRUE );
+ assert( answer == 1 );
+
+ answer = FALSE;
+
+ assert( answer == FALSE );
+ assert( answer == 0 );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
#include "mem.h"
#include "fasta.h"
-#define TEST_FILE "test/test_files/test.fna"
+#define TEST_FILE1 "test/test_files/test.fna"
+#define TEST_FILE2 "/Users/m.hansen/DATA/genomes/hg18/hg18.fna"
+#define TEST_COUNT 10
static void test_fasta_get_entry();
+
int main()
{
fprintf( stderr, "Running all tests for fasta.c\n" );
{
fprintf( stderr, " Testing fasta_get_entry ... " );
- FILE *fp;
+ file_buffer *buffer;
seq_entry *entry;
- fp = read_open( TEST_FILE );
+ buffer = buffer_read( TEST_FILE1 );
- entry = mem_get( sizeof( entry ) );
+ entry = mem_get( sizeof( seq_entry ) );
- if ( fasta_get_entry( fp, &entry ) != FALSE )
+ while ( fasta_get_entry( buffer, &entry ) != FALSE )
{
- assert( strlen( entry->seq_name ) == 5 );
- assert( strlen( entry->seq ) == 60 );
- assert( entry->seq_len == 60 );
- assert( strlen( entry->seq ) == entry->seq_len );
+// assert( strlen( entry->seq ) == entry->seq_len );
+
+// printf( "%s\t%zu\n", entry->seq_name, entry->seq_len );
+
+// free( entry->seq_name );
+// free( entry->seq );
}
- close_stream( fp );
+ buffer_destroy( &buffer );
+
+// mem_free( ( void * ) buffer );
fprintf( stderr, "OK\n" );
}
#include "common.h"
#include "filesys.h"
+//#define TEST_FILE "/Users/m.hansen/DATA/genomes/hg18/hg18.fna"
+#define TEST_FILE "test/test_files/test.fna"
+
static void test_read_open();
static void test_write_open();
static void test_append_open();
static void test_file_read();
static void test_file_unlink();
static void test_file_rename();
+static void test_buffer_read();
+static void test_buffer_getc();
+static void test_buffer_ungetc();
+static void test_buffer_gets();
+static void test_buffer_ungets();
+static void test_buffer_new_size();
+static void test_buffer_resize();
+static void test_buffer_move();
+static void test_buffer_destroy();
+static void test_buffer_print();
int main()
test_file_unlink();
test_file_rename();
+ test_buffer_move();
+ test_buffer_resize();
+
+ test_buffer_read();
+ test_buffer_getc();
+ test_buffer_ungetc();
+ test_buffer_gets();
+ test_buffer_ungets();
+ test_buffer_new_size();
+ test_buffer_destroy();
+ test_buffer_print();
+
fprintf( stderr, "Done\n\n" );
return EXIT_SUCCESS;
void test_read_open()
{
- FILE *fp;
-
fprintf( stderr, " Testing read_open ... " );
+ FILE *fp;
+
// fp = read_open( "/tmp/asdf" );
// fp = read_open( "/private/etc/ssh_host_rsa_key" );
fp = read_open( "/dev/null" );
void test_write_open()
{
- FILE *fp;
-
fprintf( stderr, " Testing write_open ... " );
+ FILE *fp;
+
// fp = write_open( "/tmp/asdf" );
// fp = write_open( "/private/etc/ssh_host_rsa_key" );
fp = write_open( "/dev/null" );
void test_append_open()
{
- FILE *fp;
-
fprintf( stderr, " Testing append_open ... " );
+ FILE *fp;
+
//fp = append_open( "/tmp/asdf" );
//fp = append_open( "/private/etc/ssh_host_rsa_key" );
fp = append_open( "/dev/null" );
void test_close_stream()
{
- FILE *fp;
-
fprintf( stderr, " Testing close_stream ... " );
+ FILE *fp;
+
fp = read_open( "/dev/null" );
close_stream( fp );
void test_file_read()
{
+ fprintf( stderr, " Testing file_read ... " );
+
char *test_file = "/etc/passwd";
char *buffer;
FILE *fp;
size_t len = 1000;
- fprintf( stderr, " Testing file_read ... " );
-
fp = read_open( test_file );
buffer = file_read( fp, len );
void test_file_unlink()
{
+ fprintf( stderr, " Testing file_unlink ... " );
+
char *test_file = "/tmp/test";
FILE *fp;
- fprintf( stderr, " Testing file_unlink ... " );
-
fp = write_open( test_file );
close_stream( fp );
void test_file_rename()
{
+ fprintf( stderr, " Testing file_rename ... " );
+
char *file_before = "/tmp/before";
char *file_after = "/tmp/after";
FILE *fp;
-
- fprintf( stderr, " Testing file_rename ... " );
fp = write_open( file_before );
fprintf( stderr, "OK\n" );
}
+
+
+void test_buffer_read()
+{
+ fprintf( stderr, " Testing buffer_read ... " );
+
+ char *file = "/tmp/test_buffer_read";
+ char *str = "MARTIN";
+ FILE *fp;
+ size_t i;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ assert( buffer->pos == 0 );
+ assert( buffer->use == 0 );
+ assert( buffer->end == 5 );
+ assert( buffer->eof == TRUE );
+
+ for ( i = 0; str[ i ]; i++ ) {
+ assert( str[ i ] == buffer->str[ i ] );
+ }
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_getc()
+{
+ fprintf( stderr, " Testing buffer_getc ... " );
+
+ char *file = "/tmp/test_buffer_getc";
+ char *str = "MARTIN";
+ FILE *fp;
+ size_t i;
+ char c;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ for ( i = 0; str[ i ]; i++ )
+ {
+ c = buffer_getc( buffer );
+
+ assert( c != EOF );
+
+ assert( str[ i ] == c );
+ }
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_ungetc()
+{
+ fprintf( stderr, " Testing buffer_ungetc ... " );
+
+ char *file = "/tmp/test_buffer_ungetc";
+ char *str = "MARTIN";
+ FILE *fp;
+ char c;
+ size_t i;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ c = buffer_getc( buffer );
+
+ assert( c == 'M' );
+
+ buffer_ungetc( buffer );
+
+ i = 0;
+
+ while ( ( c = buffer_getc( buffer ) ) != EOF )
+ {
+ assert( c == str[ i ] );
+
+ i++;
+ }
+
+ assert( c == EOF );
+
+ buffer_ungetc( buffer );
+
+ c = buffer_getc( buffer );
+
+ assert( c == 'N' );
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_gets()
+{
+ fprintf( stderr, " Testing buffer_gets ... " );
+
+ char *file = "/tmp/test_buffer_gets";
+ char *out = "MARTIN\nASSER\nHANSEN\n";
+ FILE *fp;
+ char *str;
+ int i;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, out );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ i = 0;
+
+ while( ( str = buffer_gets( buffer ) ) != NULL )
+ {
+ if ( i == 0 ) {
+ assert( strcmp( str, "MARTIN\n" ) == 0 );
+ } else if ( i == 1 ) {
+ assert( strcmp( str, "ASSER\n" ) == 0 );
+ } else if ( i == 2 ) {
+ assert( strcmp( str, "HANSEN\n" ) == 0 );
+ }
+
+ i++;
+ }
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_ungets()
+{
+ fprintf( stderr, " Testing buffer_ungets ... " );
+
+ char *file = "/tmp/test_buffer_ungets";
+ char *out = "MARTIN\nASSER\nHANSEN\n";
+ FILE *fp;
+ char *str1;
+ char *str2;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, out );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ str1 = buffer_gets( buffer );
+
+ buffer_ungets( buffer );
+
+ str2 = buffer_gets( buffer );
+
+ assert( strcmp( str1, str2 ) == 0 );
+
+ while ( ( str1 = buffer_gets( buffer ) ) != NULL )
+ {
+ }
+
+ buffer_ungets( buffer );
+
+ str1 = buffer_gets( buffer );
+
+ assert( ( strcmp( str1, "HANSEN\n" ) ) == 0 );
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_new_size()
+{
+ fprintf( stderr, " Testing buffer_new_size ... " );
+
+ char *file = "/tmp/test_buffer_new_size";
+ char *str = "X";
+ FILE *fp;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ buffer_new_size( buffer, 201048577 );
+
+ assert( buffer->size == 268435456 );
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_resize()
+{
+ fprintf( stderr, " Testing buffer_resize ... " );
+
+ char *file = "/tmp/test_buffer_new_size";
+ char *str = "ABC";
+ FILE *fp;
+ char c;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ while ( ( c = buffer_getc( buffer ) ) != EOF )
+ {
+ printf( "C: %c\n", c );
+
+
+ }
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_move()
+{
+ fprintf( stderr, " Testing buffer_resize ... " );
+
+ char *file = "/tmp/test_buffer_new_size";
+ char *str = "ABCDEFG";
+ FILE *fp;
+ file_buffer *buffer;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ buffer_print( buffer );
+
+ buffer_move( buffer, 7, 2 );
+
+ buffer_print( buffer );
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_destroy()
+{
+ fprintf( stderr, " Testing buffer_destroy ... " );
+
+ char *file = "/tmp/test_buffer_destroy";
+ char *str = "X";
+ FILE *fp;
+ file_buffer *buffer = NULL;
+
+ fp = write_open( file );
+
+ fprintf( fp, str );
+
+ close_stream( fp );
+
+ buffer = buffer_read( file );
+
+ buffer_destroy( &buffer );
+
+ assert( buffer->str == NULL );
+
+ buffer = NULL;
+
+ assert( buffer == NULL );
+
+ file_unlink( file );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
+void test_buffer_print()
+{
+ fprintf( stderr, " Testing buffer_print ... " );
+
+ file_buffer *buffer;
+
+ buffer = buffer_read( TEST_FILE );
+
+// buffer_print( buffer );
+
+ buffer_destroy( &buffer );
+
+ buffer = NULL;
+
+ fprintf( stderr, "OK\n" );
+}
+
$test_dir = "test";
@tests = qw(
+ test_common
test_fasta
test_filesys
test_mem