*/
#include "common.h"
+#include "mem.h"
#include "filesys.h"
+#include "seq.h"
#include "fasta.h"
// #define OLIGO_SIZE 15
array = oligo_count( path );
- oligo_count_output( path, array );
+ //oligo_count_output( path, array );
return 0;
}
/* Count the occurence of all oligos of a fixed size in a FASTA file. */
- uint *array = NULL;
- uint i = 0;
- uint mask = 0;
- uint bin = 0;
- uint bin_rc1 = 0;
- uint bin_rc2 = 0;
- uint j = 0;
- uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */
- uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */
- uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */
- struct seq_entry *entry = NULL;
- FILE *fp = NULL;
+ uint *array = NULL;
+ uint i = 0;
+ uint mask = 0;
+ uint bin = 0;
+ uint bin_rc1 = 0;
+ uint bin_rc2 = 0;
+ uint j = 0;
+ uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */
+ uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */
+ uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */
+ seq_entry *entry = NULL;
+ FILE *fp = NULL;
array = mem_get_zero( sizeof( uint ) * SIZE );
mask = mask_create( OLIGO_SIZE );
- MEM_GET( entry );
+ entry = mem_get( sizeof( entry ) );
fp = read_open( path );
- while ( ( fasta_get_entry( fp, entry ) ) )
+ while ( ( fasta_get_entry( fp, &entry ) ) )
{
fprintf( stderr, "Counting oligos in: %s ... ", entry->seq_name );
close_stream( fp );
- fasta_free_entry( entry );
+ free( entry->seq_name );
+ free( entry->seq );
+ entry = NULL;
return array;
}
}
-void oligo_count_output( char *path, uint *array )
-{
- /* Martin A. Hansen, June 2008 */
-
- /* Output oligo count for each sequence position. */
-
- struct seq_entry *entry;
- FILE *fp;
- uint mask;
- uint i;
- uint j;
- uint bin;
- int count;
- uint *block;
- uint block_pos;
- uint block_beg;
- uint chr_pos;
-
- mask = mask_create( OLIGO_SIZE );
-
- MEM_GET( entry );
-
- fp = read_open( path );
-
- while ( ( fasta_get_entry( fp, entry ) ) )
- {
- fprintf( stderr, "Writing results for: %s ... ", entry->seq_name );
-
- bin = 0;
- j = 0;
- block_pos = 0;
- block = mem_get_zero( sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE ) );
-
- for ( i = 0; entry->seq[ i ]; i++ )
- {
- bin <<= 2;
-
- switch( entry->seq[ i ] )
- {
- case 'A': case 'a': j++; break;
- case 'T': case 't': bin |= T; j++; break;
- case 'C': case 'c': bin |= C; j++; break;
- case 'G': case 'g': bin |= G; j++; break;
- default: bin = 0; j = 0; break;
- }
-
- if ( j >= OLIGO_SIZE )
- {
- count = array[ ( bin & mask ) ];
-
- if ( count > 1 )
- {
- chr_pos = i - OLIGO_SIZE + 1;
-
- if ( block_pos == 0 )
- {
- ZERO( block );
-
- block_beg = chr_pos;
-
- block[ block_pos ] = count;
-
- block_pos++;
- }
- else
- {
- if ( chr_pos > block_beg + block_pos )
- {
- fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
-
- block_pos = 0;
- }
- else
- {
- block[ block_pos ] = count;
-
- block_pos++;
- }
- }
- }
- }
- }
-
- if ( block_pos > 0 )
- {
- fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
-
- mem_free( block );
- }
-
- fprintf( stderr, "done.\n" );
- }
-
- close_stream( fp );
-
- fasta_free_entry( entry );
-}
+//void oligo_count_output( char *path, uint *array )
+//{
+// /* Martin A. Hansen, June 2008 */
+//
+// /* Output oligo count for each sequence position. */
+//
+// struct seq_entry *entry;
+// FILE *fp;
+// uint mask;
+// uint i;
+// uint j;
+// uint bin;
+// int count;
+// uint *block;
+// uint block_pos;
+// uint block_beg;
+// uint block_size;
+// uint chr_pos;
+// file_buffer *buffer;
+//
+// mask = mask_create( OLIGO_SIZE );
+//
+// entry = mem_get( sizeof( entry ) );
+//
+// fp = read_open( path );
+//
+// while ( ( fasta_get_entry( buffer, &entry ) ) )
+// {
+// fprintf( stderr, "Writing results for: %s ... ", entry->seq_name );
+//
+// bin = 0;
+// j = 0;
+// block_pos = 0;
+// block_size = sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE );
+// block = mem_get_zero( block_size );
+//
+// for ( i = 0; entry->seq[ i ]; i++ )
+// {
+// bin <<= 2;
+//
+// switch( entry->seq[ i ] )
+// {
+// case 'A': case 'a': j++; break;
+// case 'T': case 't': bin |= T; j++; break;
+// case 'C': case 'c': bin |= C; j++; break;
+// case 'G': case 'g': bin |= G; j++; break;
+// default: bin = 0; j = 0; break;
+// }
+//
+// if ( j >= OLIGO_SIZE )
+// {
+// count = array[ ( bin & mask ) ];
+//
+// if ( count > 1 )
+// {
+// chr_pos = i - OLIGO_SIZE + 1;
+//
+// if ( block_pos == 0 )
+// {
+// memset( block, '\0', block_size );
+//
+// block_beg = chr_pos;
+//
+// block[ block_pos ] = count;
+//
+// block_pos++;
+// }
+// else
+// {
+// if ( chr_pos > block_beg + block_pos )
+// {
+// fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
+//
+// block_pos = 0;
+// }
+// else
+// {
+// block[ block_pos ] = count;
+//
+// block_pos++;
+// }
+// }
+// }
+// }
+// }
+//
+// if ( block_pos > 0 )
+// {
+// fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
+//
+// mem_free( ( void * ) &block );
+// }
+//
+// fprintf( stderr, "done.\n" );
+// }
+//
+// close_stream( fp );
+//
+// fasta_free_entry( entry );
+//}
void fixedstep_put_entry( char *chr, int beg, int step_size, uint *block_array, int block_size )