+/* Martin Asser Hansen (mail@maasha.dk) Copyright (C) 2008 - All right reserved */
+
+
#include "common.h"
#include "mem.h"
+#include "filesys.h"
+#include "list.h"
+#include "strings.h"
#include "ucsc.h"
-void bed_get_entry( FILE *fp, struct bed_entry3 *bed, int cols )
+
+bed_entry *bed_entry_new( const int cols )
{
- /* Martin A. Hansen, June 2008 */
+ bed_entry *entry = mem_get( sizeof( bed_entry ) );
- /* Get next 3 column bed entry from stream. */
+ entry->cols = cols;
+ entry->chr = mem_get( BED_CHR_MAX );
+ entry->chr_beg = 0;
+ entry->chr_end = 0;
- char bed_buffer[ BED_BUFFER ];
- struct bed_entry12 *bed12 = NULL;
+ if ( cols == 3 ) {
+ return entry;
+ }
- bed12 = mem_get( sizeof( bed12 ) );
+ entry->q_id = mem_get( BED_QID_MAX );
- if ( ( fgets( bed_buffer, sizeof( bed_buffer ), fp ) != NULL ) )
- {
- printf( "buffer: %s\n", bed_buffer );
-
- bed_split( bed_buffer, bed12, 3 );
+ if ( cols == 4 ) {
+ return entry;
+ }
+
+ entry->score = 0;
+
+ if ( cols == 5 ) {
+ return entry;
+ }
+
+ entry->strand = 0;
- return;
+ if ( cols == 6 ) {
+ return entry;
}
-// return NULL;
+ entry->thick_beg = 0;
+ entry->thick_end = 0;
+ entry->itemrgb = mem_get( BED_ITEMRGB_MAX );
+ entry->blockcount = 0;;
+ entry->blocksizes = mem_get( BED_BLOCKSIZES_MAX );
+ entry->q_begs = mem_get( BED_QBEGS_MAX );
+
+ return entry;
}
-void bed_split( char *string, struct bed_entry12 *bed, int cols )
+bed_entry *bed_entry_get( FILE *fp, int cols )
{
- int i;
- int field_num;
- int offset;
- char *new_line;
- int new_line_pos;
- char *pt;
- int pos;
- int field_seps[ cols ];
- int field_len;
- char *field;
-
- if ( ( new_line = memchr( string, '\n', 1024 ) ) != NULL ) {
- new_line_pos = new_line - string;
- } else {
- die( "bed_split: no newline found." );
- }
+ bed_entry *entry = bed_entry_new( cols );
+ char buffer[ BED_BUFFER ];
- field_num = 0;
- offset = 0;
+ assert( cols == 0 || cols == 3 || cols == 4 || cols == 5 || cols == 6 || cols == 12 );
- for ( i = 0; i < cols; i++ )
+ if ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
{
- if ( ( pt = memchr( &string[ offset ], '\t', new_line_pos - offset ) ) != NULL )
+ if ( ! cols )
+ {
+ cols = 1 + strchr_total( buffer, '\t' );
+ entry->cols = cols;
+ }
+
+ if ( cols == 3 )
{
- pos = pt - string;
+ sscanf(
+ buffer,
+ "%s\t%u\t%u",
+ entry->chr,
+ &entry->chr_beg,
+ &entry->chr_end
+ );
- pos = MIN( pos, new_line_pos );
+ return entry;
+ }
- field_seps[ field_num ] = pos;
+ if ( cols == 4 )
+ {
+ sscanf(
+ buffer,
+ "%s\t%u\t%u\t%s",
+ entry->chr,
+ &entry->chr_beg,
+ &entry->chr_end,
+ entry->q_id
+ );
+
+ return entry;
+ }
+
+ if ( cols == 5 )
+ {
+ sscanf(
+ buffer,
+ "%s\t%u\t%u\t%s\t%i",
+ entry->chr,
+ &entry->chr_beg,
+ &entry->chr_end,
+ entry->q_id,
+ &entry->score
+ );
- field_num++;
+ return entry;
}
- else
+
+ if ( cols == 6 )
{
- die( "bed_split: no tab found." );
+ sscanf(
+ buffer,
+ "%s\t%u\t%u\t%s\t%i\t%c",
+ entry->chr,
+ &entry->chr_beg,
+ &entry->chr_end,
+ entry->q_id,
+ &entry->score,
+ &entry->strand
+ );
+
+ return entry;
}
- offset += pos + 1;
+ if ( cols == 12 )
+ {
+ sscanf(
+ buffer,
+ "%s\t%u\t%u\t%s\t%i\t%c\t%u\t%u\t%s\t%u\t%s\t%s",
+ entry->chr,
+ &entry->chr_beg,
+ &entry->chr_end,
+ entry->q_id,
+ &entry->score,
+ &entry->strand,
+ &entry->thick_beg,
+ &entry->thick_end,
+ entry->itemrgb,
+ &entry->blockcount,
+ entry->blocksizes,
+ entry->q_begs
+ );
+
+ return entry;
+ }
}
- offset = 0;
+ return NULL;
+}
+
+
+list_sl *bed_entries_get( char *path, const int cols )
+{
+ list_sl *list = list_sl_new();
+ node_sl *node = node_sl_new();
+ node_sl *old_node = NULL;
+ bed_entry *entry = NULL;
+ FILE *fp = NULL;
+
+ fp = read_open( path );
- for ( i = 0; i < cols; i++ )
+ if ( ( entry = bed_entry_get( fp, cols ) ) != NULL )
{
- field_len = field_seps[ i ] - offset;
-
- field = mem_get( field_len );
-
- field[ field_len ] = '\0';
-
- memcpy( field, &string[ offset ], field_len );
-
- if ( i == 0 ) {
- bed->chr = mem_clone( ( char * ) field, field_len );
- } else if ( i == 1 ) {
- bed->chr_beg = strtod( field, &pt );
- } else if ( i == 2 ) {
- bed->chr_end = strtod( field, &pt );
- } else if ( i == 3 ) {
- bed->q_id = mem_clone( ( char * ) field, field_len );
- } else if ( i == 4 ) {
- bed->score = strtof( field, &pt );
- } else if ( i == 5 ) {
- bed->strand = field[ 0 ];
- } else if ( i == 6 ) {
- bed->thick_beg = strtod( field, &pt );
- } else if ( i == 7 ) {
- bed->thick_end = strtod( field, &pt );
- } else if ( i == 8 ) {
- bed->itemrgb = mem_clone( ( char * ) field, field_len );
- } else if ( i == 9 ) {
- bed->blockcount = strtod( field, &pt );
- } else if ( i == 10 ) {
- bed->blocksizes = mem_clone( ( char * ) field, field_len );
- } else if ( i == 11 ) {
- bed->q_begs = mem_clone( ( char * ) field, field_len );
- }
+ node->val = entry;
+
+ list_sl_add_beg( &list, &node );
- if ( pt == NULL ) {
- die( "bed parse failed." );
- }
+ old_node = node;
+ }
+
+ while ( ( entry = bed_entry_get( fp, cols ) ) != NULL )
+ {
+ node = node_sl_new();
+
+ node->val = entry;
+
+ list_sl_add_after( &old_node, &node );
+
+ old_node = node;
+ }
+
+ close_stream( fp );
+
+ return list;
+}
- offset = field_seps[ i ] + 1;
+
+void bed_entry_put( bed_entry *entry, int cols )
+{
+ if ( ! cols ) {
+ cols = entry->cols;
+ }
+
+ if ( cols == 3 )
+ {
+ printf(
+ "%s\t%u\t%u\n",
+ entry->chr,
+ entry->chr_beg,
+ entry->chr_end
+ );
+ }
+ else if ( cols == 4 )
+ {
+ printf(
+ "%s\t%u\t%u\t%s\n",
+ entry->chr,
+ entry->chr_beg,
+ entry->chr_end,
+ entry->q_id
+ );
+ }
+ else if ( cols == 5 )
+ {
+ printf(
+ "%s\t%u\t%u\t%s\t%i\n",
+ entry->chr,
+ entry->chr_beg,
+ entry->chr_end,
+ entry->q_id,
+ entry->score
+ );
+ }
+ else if ( cols == 6 )
+ {
+ printf(
+ "%s\t%u\t%u\t%s\t%i\t%c\n",
+ entry->chr,
+ entry->chr_beg,
+ entry->chr_end,
+ entry->q_id,
+ entry->score,
+ entry->strand
+ );
+ }
+ else if ( cols == 12 )
+ {
+ printf(
+ "%s\t%u\t%u\t%s\t%i\t%c\t%u\t%u\t%s\t%u\t%s\t%s\n",
+ entry->chr,
+ entry->chr_beg,
+ entry->chr_end,
+ entry->q_id,
+ entry->score,
+ entry->strand,
+ entry->thick_beg,
+ entry->thick_end,
+ entry->itemrgb,
+ entry->blockcount,
+ entry->blocksizes,
+ entry->q_begs
+ );
+ }
+ else
+ {
+ fprintf( stderr, "ERROR: Wrong number of columns in bed_entry_put: %d\n", cols );
+
+ abort();
+ }
+}
+
+
+void bed_entries_put( list_sl *entries, int cols )
+{
+ node_sl *node = NULL;
+
+ for ( node = entries->first; node != NULL; node = node->next ) {
+ bed_entry_put( ( bed_entry * ) node->val, cols );
+ }
+}
+
+
+int cmp_bed_sort_beg( const void *a, const void *b )
+{
+ node_sl *a_node = *( ( node_sl ** ) a );
+ node_sl *b_node = *( ( node_sl ** ) b );
+
+ bed_entry *a_entry = ( bed_entry * ) a_node->val;
+ bed_entry *b_entry = ( bed_entry * ) b_node->val;
+
+ if ( a_entry->chr_beg < b_entry->chr_beg ) {
+ return -1;
+ } else if ( a_entry->chr_beg > b_entry->chr_beg ) {
+ return 1;
+ } else {
+ return 0;
}
+}
+
+
+int cmp_bed_sort_chr_beg( const void *a, const void *b )
+{
+ node_sl *a_node = *( ( node_sl ** ) a );
+ node_sl *b_node = *( ( node_sl ** ) b );
+
+ bed_entry *a_entry = ( bed_entry * ) a_node->val;
+ bed_entry *b_entry = ( bed_entry * ) b_node->val;
+
+ int diff = 0;
+
+ diff = strcmp( a_entry->chr, b_entry->chr );
- printf( "chr ->%s\n", bed->chr );
- printf( "chr_beg->%u\n", bed->chr_beg );
- printf( "chr_end->%u\n", bed->chr_end );
+ if ( diff < 0 ) {
+ return -1;
+ } else if ( diff > 0 ) {
+ return 1;
+ } else if ( a_entry->chr_beg < b_entry->chr_beg ) {
+ return -1;
+ } else if ( a_entry->chr_beg > b_entry->chr_beg ) {
+ return 1;
+ } else {
+ return 0;
+ }
}
+int cmp_bed_sort_chr_strand_beg( const void *a, const void *b )
+{
+ node_sl *a_node = *( ( node_sl ** ) a );
+ node_sl *b_node = *( ( node_sl ** ) b );
+
+ bed_entry *a_entry = ( bed_entry * ) a_node->val;
+ bed_entry *b_entry = ( bed_entry * ) b_node->val;
+
+ int diff = 0;
+
+ diff = strcmp( a_entry->chr, b_entry->chr );
+
+ if ( diff < 0 ) {
+ return -1;
+ } else if ( diff > 0 ) {
+ return 1;
+ } else if ( a_entry->strand < b_entry->strand ) {
+ return -1;
+ } else if ( a_entry->strand > b_entry->strand ) {
+ return 1;
+ } else if ( a_entry->chr_beg < b_entry->chr_beg ) {
+ return -1;
+ } else if ( a_entry->chr_beg > b_entry->chr_beg ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+