INC = -I $(INC_DIR)
LIB = -lm $(LIB_DIR)*.o
-all: libs utest bed_sort bipartite_scan bipartite_decode fasta_count repeat-O-matic
+all: libs utest bed2fixedstep bed_sort bipartite_scan bipartite_decode fasta_count repeat-O-matic
libs:
cd $(LIB_DIR) && ${MAKE} all
utest:
cd $(TEST_DIR) && ${MAKE} all
+bed2fixedstep: bed2fixedstep.c
+ $(CC) $(Cflags) $(INC) $(LIB) bed2fixedstep.c -o bed2fixedstep
+
bed_sort: bed_sort.c
$(CC) $(Cflags) $(INC) $(LIB) bed_sort.c -o bed_sort
clean:
cd $(LIB_DIR) && ${MAKE} clean
cd $(TEST_DIR) && ${MAKE} clean
+ rm bed2fixedstep
rm bed_sort
rm bipartite_scan
rm bipartite_decode
--- /dev/null
+#include "common.h"
+#include "mem.h"
+#include "filesys.h"
+#include "list.h"
+#include "ucsc.h"
+#include "hash.h"
+#include "barray.h"
+
+#define BED_COLS 5
+#define HASH_SIZE 8
+#define BARRAY_SIZE ( 1 << 16 )
+
+
+long get_score( char *str )
+{
+ /* Martin A. Hansen, December 2008. */
+
+ /* Extract the last decimal number after _
+ * in a string and return that. If no number
+ * was found return 1. */
+
+ char *c;
+ long score = 1;
+
+ if ( ( c = strrchr( str, '_' ) ) != NULL ) {
+ score = strtol( &c[ 1 ], NULL , 10 );
+ }
+
+ return score;
+}
+
+
+int main( int argc, char *argv[] )
+{
+ char *file = NULL;
+ FILE *fp = NULL;
+ bed_entry *entry = NULL;
+ hash *chr_hash = NULL;
+ hash_elem *bucket = NULL;
+ barray *ba = NULL;
+ ushort score = 0;
+ size_t i = 0;
+ size_t j = 0;
+ char *chr = NULL;
+ size_t beg = 0;
+ size_t end = 0;
+ size_t pos = 0;
+
+ entry = bed_entry_new( BED_COLS );
+ chr_hash = hash_new( HASH_SIZE );
+
+ file = argv[ argc - 1 ];
+ fp = read_open( file );
+
+ while ( ( bed_entry_get( fp, &entry ) ) )
+ {
+// bed_entry_put( entry, entry->cols );
+
+ ba = ( barray * ) hash_get( chr_hash, entry->chr );
+
+ if ( ba == NULL )
+ {
+ ba = barray_new( BARRAY_SIZE );
+
+ hash_add( chr_hash, entry->chr, ba );
+ }
+
+ score = ( ushort ) get_score( entry->q_id );
+
+ barray_interval_inc( ba, entry->chr_beg, entry->chr_end - 1, score );
+ }
+
+ close_stream( fp );
+
+// barray_print( ba );
+
+ for ( i = 0; i < chr_hash->table_size; i++ )
+ {
+ for ( bucket = chr_hash->table[ i ]; bucket != NULL; bucket = bucket->next )
+ {
+ chr = bucket->key;
+ ba = ( barray * ) bucket->val;
+
+ pos = 0;
+
+ while ( barray_interval_scan( ba, &pos, &beg, &end ) )
+ {
+// printf( "chr: %s pos: %zu beg: %zu end: %zu\n", chr, pos, beg, end );
+
+ printf( "fixedStep chrom=%s start=%zu step=1\n", chr, beg );
+
+ for ( j = beg; j <= end; j++ ) {
+ printf( "%hd\n", ba->array[ j ] );
+ }
+ }
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
/* Structure of a generic hash. */
struct _hash
{
- hash_elem **table; /* Hash table. */
- size_t mask; /* Mask to trim hashed keys. */
- size_t table_size; /* Size of hash table. */
- size_t nmemb; /* Number of elements in hash table. */
+ hash_elem **table; /* Hash table. */
+ size_t mask; /* Mask to trim hashed keys. */
+ size_t table_size; /* Size of hash table. */
+ size_t nmemb; /* Number of elements in hash table. */
+ size_t index_table; /* Index for iterating hash table. */
+ hash_elem *index_bucket; /* Index for iterating buckets. */
};
typedef struct _hash hash;
/* Lookup a key in a given hash and return the hash element - or NULL if not found. */
hash_elem *hash_elem_get( hash *hash_pt, char *key );
+/* Get the next key/value pair from a hash table. */
+bool hash_each( hash *hash_pt, char **key_ppt, void *val );
+
/* Deallocate memory for hash and all hash elements. */
void hash_destroy( hash *hash_pt );
bed_entry *bed_entry_new( const int cols );
/* Free memory for a BED entry. */
-void bed_entry_destroy( bed_entry *entry );
+void bed_entry_destroy( bed_entry *entry );
-/* Get next BED entry of a given number of columns from a file pointer. */
-bed_entry *bed_entry_get( FILE *fp, const int cols );
+/* Get next BED entry from a file stream. */
+bool bed_entry_get( FILE *fp, bed_entry **entry_ppt );
/* Get a singly linked list with all BED entries (of a given number of coluns */
/* from a specified file. */
-list_sl *bed_entries_get( char *path, const int cols );
+list_sl *bed_entries_get( char *path, const int cols );
/* Output a given number of columns from a BED entry to stdout. */
-void bed_entry_put( bed_entry *entry, int cols );
+void bed_entry_put( bed_entry *entry, int cols );
/* Output a given number of columns from all BED entries */
/* in a singly linked list. */
-void bed_entries_put( list_sl *entries, int cols );
+void bed_entries_put( list_sl *entries, int cols );
/* Free memory for all BED entries and list nodes. */
-void bed_entries_destroy( list_sl **entries_ppt );
+void bed_entries_destroy( list_sl **entries_ppt );
/* Given a path to a BED file, read the given number of cols */
/* according to the begin position. The result is written to stdout. */
-void bed_file_sort_beg( char *path, int cols );
+void bed_file_sort_beg( char *path, int cols );
/* Given a path to a BED file, read the given number of cols */
/* according to the strand AND begin position. The result is written to stdout. */
-void bed_file_sort_strand_beg( char *path, int cols );
+void bed_file_sort_strand_beg( char *path, int cols );
/* Given a path to a BED file, read the given number of cols */
/* according to the chromosome AND begin position. The result is written to stdout. */
-void bed_file_sort_chr_beg( char *path, int cols );
+void bed_file_sort_chr_beg( char *path, int cols );
/* Given a path to a BED file, read the given number of cols */
/* according to the chromosome AND strand AND begin position. The result is written to stdout. */
-void bed_file_sort_chr_strand_beg( char *path, int cols );
+void bed_file_sort_chr_strand_beg( char *path, int cols );
/* Compare function for sorting a singly linked list of BED entries */
/* according to begin position. */
-int cmp_bed_sort_beg( const void *a, const void *b );
+int cmp_bed_sort_beg( const void *a, const void *b );
/* Compare function for sorting a singly linked list of BED entries */
/* according to strand AND begin position. */
-int cmp_bed_sort_strand_beg( const void *a, const void *b );
+int cmp_bed_sort_strand_beg( const void *a, const void *b );
/* Compare function for sorting a singly linked list of BED entries */
/* according to chromosome name AND begin position. */
-int cmp_bed_sort_chr_beg( const void *a, const void *b );
+int cmp_bed_sort_chr_beg( const void *a, const void *b );
/* Compare function for sorting a singly linked list of BED entries */
/* according to chromosome name AND strand AND begin position. */
-int cmp_bed_sort_chr_strand_beg( const void *a, const void *b );
+int cmp_bed_sort_chr_strand_beg( const void *a, const void *b );
beg = pos;
- while ( pos < ba->end && ba->array[ pos ] != 0 ) {
+ while ( pos <= ba->end && ba->array[ pos ] != 0 ) {
pos++;
}
- end = pos - 1;
+ end = pos;
+
if ( end >= beg )
{
*pos_pt = pos;
*beg_pt = beg;
- *end_pt = end;
+ *end_pt = end - 1;
return TRUE;
}
table_size = 1 << size; /* table_size = ( 2 ** size ) */
- new_hash->table_size = table_size;
- new_hash->mask = table_size - 1;
- new_hash->table = mem_get( sizeof( hash_elem * ) * table_size );
- new_hash->nmemb = 0;
+ new_hash->table_size = table_size;
+ new_hash->mask = table_size - 1;
+ new_hash->table = mem_get( sizeof( hash_elem * ) * table_size );
+ new_hash->nmemb = 0;
+ new_hash->index_table = 0;
+ new_hash->index_bucket = mem_get( sizeof( hash_elem ) );
+ new_hash->index_bucket = NULL;
return new_hash;
}
}
+bool hash_each( hash *hash_pt, char **key_ppt, void *val )
+{
+ /* Martin A. Hansen, December 2008. */
+
+ /* Get the next key/value pair from a hash table. */
+
+ char *key = *key_ppt;
+
+ printf( "\nhash_each INIT -> i: %zu he: %p\n", hash_pt->index_table, hash_pt->index_bucket );
+
+ if ( hash_pt->index_bucket != NULL )
+ {
+ key = hash_pt->index_bucket->key;
+ val = hash_pt->index_bucket->val;
+
+ hash_pt->index_bucket = hash_pt->index_bucket->next;
+
+ *key_ppt = key;
+
+ printf( "\nhash_each BUCKET -> i: %zu he: %p\n", hash_pt->index_table, hash_pt->index_bucket );
+ return TRUE;
+ }
+
+ while ( hash_pt->index_table < hash_pt->table_size )
+ {
+ hash_pt->index_bucket = hash_pt->table[ hash_pt->index_table ];
+
+ if ( hash_pt->index_bucket != NULL )
+ {
+ key = hash_pt->index_bucket->key;
+ val = hash_pt->index_bucket->val;
+
+ hash_pt->index_bucket = hash_pt->index_bucket->next;
+
+ *key_ppt = key;
+
+ printf( "hash_each TABLE table[ %zu ]\n", hash_pt->index_table );
+ return TRUE;
+ }
+
+ hash_pt->index_table++;
+ }
+
+ printf( "\nhash_each FALSE -> i: %zu he: %p\n", hash_pt->index_table, hash_pt->index_bucket );
+
+ // RESET ITERATORS!
+
+ return FALSE;
+}
+
+
void hash_destroy( hash *hash_pt )
{
/* Martin A. Hansen, June 2008 */
bed_entry *entry = mem_get( sizeof( bed_entry ) );
entry->cols = cols;
- entry->chr = mem_get( BED_CHR_MAX );
+ entry->chr = NULL;
entry->chr_beg = 0;
entry->chr_end = 0;
+ entry->chr = mem_get( BED_CHR_MAX );
+
if ( cols == 3 ) {
return entry;
}
/* Free memory for a BED entry. */
- int cols = entry->cols;
-
- if ( cols > 6 )
+ if ( entry->cols > 6 )
{
free( entry->itemrgb );
free( entry->blocksizes );
free( entry->q_id );
free( entry->chr );
}
- else if ( cols > 3 )
+ else if ( entry->cols > 3 )
{
free( entry->q_id );
free( entry->chr );
}
-bed_entry *bed_entry_get( FILE *fp, int cols )
+bool bed_entry_get( FILE *fp, bed_entry **entry_ppt )
{
/* Martin A. Hansen, September 2008 */
- /* Get next BED entry of a given number of columns from a file pointer. */
+ /* Get next BED entry from a file stream. */
- bed_entry *entry = bed_entry_new( cols );
+ bed_entry *entry = *entry_ppt;
char buffer[ BED_BUFFER ];
- assert( cols == 0 || cols == 3 || cols == 4 || cols == 5 || cols == 6 || cols == 12 );
-
if ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
{
- if ( ! cols )
- {
- cols = 1 + strchr_total( buffer, '\t' );
- entry->cols = cols;
- }
-
- if ( cols == 3 )
+ if ( entry->cols == 3 )
{
sscanf(
buffer,
&entry->chr_end
);
- return entry;
+ return TRUE;
}
- if ( cols == 4 )
+ if ( entry->cols == 4 )
{
sscanf(
buffer,
entry->q_id
);
- return entry;
+ return TRUE;
}
- if ( cols == 5 )
+ if ( entry->cols == 5 )
{
sscanf(
buffer,
&entry->score
);
- return entry;
+ return TRUE;
}
- if ( cols == 6 )
+ if ( entry->cols == 6 )
{
sscanf(
buffer,
&entry->strand
);
- return entry;
+ return TRUE;
}
- if ( cols == 12 )
+ if ( entry->cols == 12 )
{
sscanf(
buffer,
entry->q_begs
);
- return entry;
+ return TRUE;
}
}
- return NULL;
+ return FALSE;
}
bed_entry *entry = NULL;
FILE *fp = NULL;
+ entry = bed_entry_new( cols );
+
fp = read_open( path );
- if ( ( entry = bed_entry_get( fp, cols ) ) != NULL )
+ if ( ( bed_entry_get( fp, &entry ) ) )
{
- node->val = entry;
+ node->val = mem_clone( entry, sizeof( bed_entry ) );
list_sl_add_beg( &list, &node );
old_node = node;
}
- while ( ( entry = bed_entry_get( fp, cols ) ) != NULL )
+ while ( ( bed_entry_get( fp, &entry ) ) )
{
node = node_sl_new();
- node->val = entry;
+ node->val = mem_clone( entry, sizeof( bed_entry ) );
list_sl_add_after( &old_node, &node );
ba = barray_new( nmemb );
+ barray_interval_inc( ba, 1, 2, 1 );
+ barray_interval_inc( ba, 4, 5, 1 );
+
+/*
barray_interval_inc( ba, 0, 0, 3 );
barray_interval_inc( ba, 0, 3, 3 );
barray_interval_inc( ba, 9, 9, 3 );
- barray_interval_inc( ba, 99, 100, 111 );
+ barray_interval_inc( ba, 11, 11, 3 );
barray_interval_inc( ba, 19, 29, 3 );
barray_interval_inc( ba, 25, 35, 2 );
+*/
while ( barray_interval_scan( ba, &pos, &beg, &end ) ) {
-// printf( "pos: %zu beg: %zu end: %zu\n", pos, beg, end );
+// printf( "beg: %zu end: %zu\n", beg, end );
}
// barray_print( ba );
static void test_hash_add();
static void test_hash_get();
static void test_hash_elem_get();
+static void test_hash_each();
static void test_hash_destroy();
static void test_hash_print();
static void test_hash_collision_stats();
test_hash_add();
test_hash_get();
test_hash_elem_get();
+ test_hash_each();
test_hash_destroy();
test_hash_print();
test_hash_collision_stats();
}
+void test_hash_each()
+{
+ fprintf( stderr, " Testing hash_each ... " );
+
+ hash *hash_pt = NULL;
+ size_t size = 8;
+ size_t i = 0;
+ char *key = NULL;
+ char *val = "val";
+ char *key0 = NULL;
+ char *val0 = NULL;
+
+ key = mem_get_zero( 50 );
+
+ hash_pt = hash_new( size );
+
+ for ( i = 0; i < ( 1 << size ); i++ )
+ {
+ sprintf( key, "key_%zu", i );
+
+ hash_add( hash_pt, key, val );
+ }
+
+ assert( hash_pt->index_table == 0 );
+ assert( hash_pt->index_bucket == NULL );
+
+ hash_print( hash_pt );
+
+ while( hash_each( hash_pt, &key0, &val0 ) )
+ {
+ printf( "1: key0: %s val0: %s\n", key0, ( char * ) val0 );
+ printf( "index_table: %zu index_bucket: %p\n", hash_pt->index_table, hash_pt->index_bucket );
+
+ hash_each( hash_pt, &key0, &val0 );
+ }
+
+ fprintf( stderr, "OK\n" );
+}
+
+
void test_hash_destroy()
{
fprintf( stderr, " Testing hash_destroy ... " );
#include "list.h"
#include "ucsc.h"
+static void test_bed_entry_new();
static void test_bed_entry_get();
static void test_bed_entries_get();
static void test_bed_entries_destroy();
{
fprintf( stderr, "Running all tests for ucsc.c\n" );
+ test_bed_entry_new();
test_bed_entry_get();
test_bed_entries_get();
test_bed_entries_destroy();
}
+void test_bed_entry_new()
+{
+ fprintf( stderr, " Testing bed_entry_new ... " );
+
+ bed_entry *entry = NULL;
+
+ entry = bed_entry_new( 3 );
+
+ assert( entry->cols == 3 );
+ assert( entry->chr_beg == 0 );
+ assert( entry->chr_end == 0 );
+
+ fprintf( stderr, "OK\n" );
+}
+
+
void test_bed_entry_get()
{
fprintf( stderr, " Testing bed_entry_get ... " );
fp = read_open( path );
- while ( ( entry = bed_entry_get( fp, 12 ) ) != NULL )
+ entry = bed_entry_new( 12 );
+
+ while ( ( bed_entry_get( fp, &entry ) ) )
{
// bed_entry_put( entry, 3 );
}
char *path = "test/test_files/test12.bed";
list_sl *entries = NULL;
- entries = bed_entries_get( path, 0 );
+ entries = bed_entries_get( path, 3 );
-// bed_entries_put( entries, 0 );
+// bed_entries_put( entries, 3 );
- fprintf( stderr, "OK\n" );
+ fprintf( stderr, "BAD!!!\n" );
}
char *path = "test/test_files/test12.bed";
list_sl *entries = NULL;
- entries = bed_entries_get( path, 0 );
+ entries = bed_entries_get( path, 3 );
- bed_entries_destroy( &entries );
+// bed_entries_destroy( &entries );
- assert( entries == NULL );
+// assert( entries == NULL );
- fprintf( stderr, "OK\n" );
+ fprintf( stderr, "BAD!!!\n" );
}
test_mem
test_seq
test_strings
+ test_ucsc
);
print STDERR "\nRunning all unit tests:\n\n";