From 3984b1d9f4ae9050906792051304c33f91400d20 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 11 Sep 2008 11:50:50 +0000 Subject: [PATCH] bed_sort created git-svn-id: http://biopieces.googlecode.com/svn/trunk@256 74ccb610-7750-0410-82ae-013aeee3265d --- code_c/Maasha/src/Makefile | 6 +- code_c/Maasha/src/bed_sort.c | 22 ++++++ code_c/Maasha/src/inc/ucsc.h | 12 ++-- code_c/Maasha/src/lib/ucsc.c | 74 +++++++++++++++++--- code_c/Maasha/src/test/test_files/test12.bed | 16 ++--- code_c/Maasha/src/test/test_ucsc.c | 3 +- 6 files changed, 109 insertions(+), 24 deletions(-) create mode 100644 code_c/Maasha/src/bed_sort.c diff --git a/code_c/Maasha/src/Makefile b/code_c/Maasha/src/Makefile index 527b6ea..4312b1f 100644 --- a/code_c/Maasha/src/Makefile +++ b/code_c/Maasha/src/Makefile @@ -11,7 +11,7 @@ TEST_DIR = test/ INC = -I $(INC_DIR) LIB = -lm $(LIB_DIR)*.o -all: libs utest bipartite_scan fasta_count repeat-O-matic +all: libs utest bed_sort bipartite_scan fasta_count repeat-O-matic libs: cd $(LIB_DIR) && ${MAKE} all @@ -19,6 +19,9 @@ libs: utest: cd $(TEST_DIR) && ${MAKE} all +bed_sort: bed_sort.c + $(CC) $(Cflags) $(INC) $(LIB) bed_sort.c -o bed_sort + bipartite_scan: bipartite_scan.c $(CC) $(Cflags) $(INC) $(LIB) bipartite_scan.c -o bipartite_scan @@ -31,6 +34,7 @@ repeat-O-matic: repeat-O-matic.c clean: cd $(LIB_DIR) && ${MAKE} clean cd $(TEST_DIR) && ${MAKE} clean + rm bed_sort rm bipartite_scan rm fasta_count rm repeat-O-matic diff --git a/code_c/Maasha/src/bed_sort.c b/code_c/Maasha/src/bed_sort.c new file mode 100644 index 0000000..d8e84eb --- /dev/null +++ b/code_c/Maasha/src/bed_sort.c @@ -0,0 +1,22 @@ +/* Martin Asser Hansen (mail@maasha.dk) Copyright (C) 2008 - All right reserved */ + +#include "common.h" +#include "list.h" +#include "ucsc.h" + + +int main( int argc, char *argv[] ) +{ + char *file = NULL; + list_sl *entries = NULL; + + file = argv[ 1 ]; + + entries = bed_entries_get( file, 3 ); + + list_sl_sort( &entries, cmp_bed_sort_chr_beg ); + + bed_entries_put( entries, 3 ); + + return EXIT_SUCCESS; +} diff --git a/code_c/Maasha/src/inc/ucsc.h b/code_c/Maasha/src/inc/ucsc.h index 0ca029d..6e655c5 100644 --- a/code_c/Maasha/src/inc/ucsc.h +++ b/code_c/Maasha/src/inc/ucsc.h @@ -1,11 +1,11 @@ /* Martin Asser Hansen (mail@maasha.dk) Copyright (C) 2008 - All right reserved */ #define BED_BUFFER 2048 -#define BED_CHR_MAX 64 +#define BED_CHR_MAX 16 #define BED_QID_MAX 256 #define BED_ITEMRGB_MAX 16 -#define BED_BLOCKSIZES_MAX 512 -#define BED_QBEGS_MAX 512 +#define BED_BLOCKSIZES_MAX 256 +#define BED_QBEGS_MAX 256 struct _bed_entry { @@ -14,7 +14,7 @@ struct _bed_entry uint chr_beg; uint chr_end; char *q_id; - uint score; + int score; char strand; uint thick_beg; uint thick_end; @@ -31,4 +31,6 @@ bed_entry *bed_entry_get( FILE *fp, const int cols ); list_sl *bed_entries_get( char *path, const int cols ); void bed_entry_put( bed_entry *entry, int cols ); void bed_entries_put( list_sl *entries, int cols ); -int cmp_bed3_entries_sort( const void *a, const void *b ); +int cmp_bed_sort_beg( const void *a, const void *b ); +int cmp_bed_sort_chr_beg( const void *a, const void *b ); +int cmp_bed_sort_chr_strand_beg( const void *a, const void *b ); diff --git a/code_c/Maasha/src/lib/ucsc.c b/code_c/Maasha/src/lib/ucsc.c index 6fab258..bdb722a 100644 --- a/code_c/Maasha/src/lib/ucsc.c +++ b/code_c/Maasha/src/lib/ucsc.c @@ -97,7 +97,7 @@ bed_entry *bed_entry_get( FILE *fp, int cols ) { sscanf( buffer, - "%s\t%u\t%u\t%s\t%u", + "%s\t%u\t%u\t%s\t%i", entry->chr, &entry->chr_beg, &entry->chr_end, @@ -112,7 +112,7 @@ bed_entry *bed_entry_get( FILE *fp, int cols ) { sscanf( buffer, - "%s\t%u\t%u\t%s\t%u\t%c", + "%s\t%u\t%u\t%s\t%i\t%c", entry->chr, &entry->chr_beg, &entry->chr_end, @@ -128,7 +128,7 @@ bed_entry *bed_entry_get( FILE *fp, int cols ) { sscanf( buffer, - "%s\t%u\t%u\t%s\t%u\t%c\t%u\t%u\t%s\t%u\t%s\t%s", + "%s\t%u\t%u\t%s\t%i\t%c\t%u\t%u\t%s\t%u\t%s\t%s", entry->chr, &entry->chr_beg, &entry->chr_end, @@ -215,7 +215,7 @@ void bed_entry_put( bed_entry *entry, int cols ) else if ( cols == 5 ) { printf( - "%s\t%u\t%u\t%s\t%u\n", + "%s\t%u\t%u\t%s\t%i\n", entry->chr, entry->chr_beg, entry->chr_end, @@ -226,7 +226,7 @@ void bed_entry_put( bed_entry *entry, int cols ) else if ( cols == 6 ) { printf( - "%s\t%u\t%u\t%s\t%u\t%c\n", + "%s\t%u\t%u\t%s\t%i\t%c\n", entry->chr, entry->chr_beg, entry->chr_end, @@ -238,7 +238,7 @@ void bed_entry_put( bed_entry *entry, int cols ) else if ( cols == 12 ) { printf( - "%s\t%u\t%u\t%s\t%u\t%c\t%u\t%u\t%s\t%u\t%s\t%s\n", + "%s\t%u\t%u\t%s\t%i\t%c\t%u\t%u\t%s\t%u\t%s\t%s\n", entry->chr, entry->chr_beg, entry->chr_end, @@ -272,7 +272,7 @@ void bed_entries_put( list_sl *entries, int cols ) } -int cmp_bed3_entries_sort( const void *a, const void *b ) +int cmp_bed_sort_beg( const void *a, const void *b ) { node_sl *a_node = *( ( node_sl ** ) a ); node_sl *b_node = *( ( node_sl ** ) b ); @@ -280,10 +280,66 @@ int cmp_bed3_entries_sort( const void *a, const void *b ) bed_entry *a_entry = ( bed_entry * ) a_node->val; bed_entry *b_entry = ( bed_entry * ) b_node->val; - if ( a_entry->chr_end < b_entry->chr_end ) { + if ( a_entry->chr_beg < b_entry->chr_beg ) { + return -1; + } else if ( a_entry->chr_beg > b_entry->chr_beg ) { + return 1; + } else { + return 0; + } +} + + +int cmp_bed_sort_chr_beg( const void *a, const void *b ) +{ + node_sl *a_node = *( ( node_sl ** ) a ); + node_sl *b_node = *( ( node_sl ** ) b ); + + bed_entry *a_entry = ( bed_entry * ) a_node->val; + bed_entry *b_entry = ( bed_entry * ) b_node->val; + + int diff = 0; + + diff = strcmp( a_entry->chr, b_entry->chr ); + + if ( diff < 0 ) { + return -1; + } else if ( diff > 0 ) { + return 1; + } else if ( a_entry->chr_beg < b_entry->chr_beg ) { + return -1; + } else if ( a_entry->chr_beg > b_entry->chr_beg ) { return 1; - } else if ( a_entry->chr_end > b_entry->chr_end ) { + } else { + return 0; + } +} + + +int cmp_bed_sort_chr_strand_beg( const void *a, const void *b ) +{ + node_sl *a_node = *( ( node_sl ** ) a ); + node_sl *b_node = *( ( node_sl ** ) b ); + + bed_entry *a_entry = ( bed_entry * ) a_node->val; + bed_entry *b_entry = ( bed_entry * ) b_node->val; + + int diff = 0; + + diff = strcmp( a_entry->chr, b_entry->chr ); + + if ( diff < 0 ) { return -1; + } else if ( diff > 0 ) { + return 1; + } else if ( a_entry->strand < b_entry->strand ) { + return -1; + } else if ( a_entry->strand > b_entry->strand ) { + return 1; + } else if ( a_entry->chr_beg < b_entry->chr_beg ) { + return -1; + } else if ( a_entry->chr_beg > b_entry->chr_beg ) { + return 1; } else { return 0; } diff --git a/code_c/Maasha/src/test/test_files/test12.bed b/code_c/Maasha/src/test/test_files/test12.bed index 7e60b26..79f75f7 100644 --- a/code_c/Maasha/src/test/test_files/test12.bed +++ b/code_c/Maasha/src/test/test_files/test12.bed @@ -1,10 +1,10 @@ -chr4 31176 31602 AA695812 0 - 31176 31602 0 1 426, 0, -chr4 44448 44874 AA695812 0 - 44448 44874 0 1 426, 0, -chr4 50522 50841 AA142091 0 - 50522 50841 0 2 81,237, 0,82, -chr4 57489 57808 AA142091 0 - 57489 57808 0 2 81,237, 0,82, -chr4 59352 59778 AA695812 0 - 59352 59778 0 1 426, 0, -chr4 63580 64332 AA979544 0 - 63580 64332 0 1 752, 0, -chr4 63710 64332 AA979534 0 - 63710 64332 0 3 111,481,30, 0,111,592, -chr4 70946 71196 AA699063 0 - 70946 71196 0 2 142,55, 0,195, +chr14 31176 31602 AA695812 0 - 31176 31602 0 1 426, 0, +chr4 70946 71196 AA699063 0 + 70946 71196 0 2 142,55, 0,195, chr4 72831 76893 AA264101 0 - 72831 76893 0 2 179,437, 0,3625, chr4 72872 76630 AA694817 0 - 72872 76630 0 3 83,54,174, 0,84,3584, +chr4 50522 50841 AA142091 0 - 50522 50841 0 2 81,237, 0,82, +chr14 44448 44874 AA695812 0 - 44448 44874 0 1 426, 0, +chr14 57489 57808 AA142091 0 + 57489 57808 0 2 81,237, 0,82, +chr4 59352 59778 AA695812 0 - 59352 59778 0 1 426, 0, +chr4 63580 64332 AA979544 0 + 63580 64332 0 1 752, 0, +chr24 63710 64332 AA979534 0 - 63710 64332 0 3 111,481,30, 0,111,592, diff --git a/code_c/Maasha/src/test/test_ucsc.c b/code_c/Maasha/src/test/test_ucsc.c index 2ef6220..4ef1cd5 100644 --- a/code_c/Maasha/src/test/test_ucsc.c +++ b/code_c/Maasha/src/test/test_ucsc.c @@ -67,7 +67,8 @@ void test_bed_entries_sort() entries = bed_entries_get( path, 0 ); - list_sl_sort( &entries, cmp_bed3_entries_sort ); + list_sl_sort( &entries, cmp_bed_sort_chr_beg ); + list_sl_sort( &entries, cmp_bed_sort_chr_strand_beg ); bed_entries_put( entries, 0 ); -- 2.39.2