From 33dcafda31d41c5389d99d0138ebb42a0f5fe5e3 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 23 Sep 2008 06:41:33 +0000 Subject: [PATCH] add bipartite_decode git-svn-id: http://biopieces.googlecode.com/svn/trunk@265 74ccb610-7750-0410-82ae-013aeee3265d --- code_c/Maasha/src/bipartite_decode.c | 148 +++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 code_c/Maasha/src/bipartite_decode.c diff --git a/code_c/Maasha/src/bipartite_decode.c b/code_c/Maasha/src/bipartite_decode.c new file mode 100644 index 0000000..0bc706d --- /dev/null +++ b/code_c/Maasha/src/bipartite_decode.c @@ -0,0 +1,148 @@ +/* Martin Asser Hansen (mail@maasha.dk) Copyright (C) 2008 - All right reserved */ + +#include "common.h" +#include "filesys.h" + +#define BITS_IN_BYTE 8 /* number of bits in one byte. */ +#define BLOCK_SPACE_MAX 64 /* maximum space between two blocks. */ +#define BLOCK_MASK ( ( BLOCK_SPACE_MAX << 1 ) - 1 ) /* mask for printing block space. */ + +/* Byte array for fast convertion of binary blocks back to DNA. */ +char *bin2dna[256] = { + "AAAA", "AAAC", "AAAG", "AAAT", "AACA", "AACC", "AACG", "AACT", + "AAGA", "AAGC", "AAGG", "AAGT", "AATA", "AATC", "AATG", "AATT", + "ACAA", "ACAC", "ACAG", "ACAT", "ACCA", "ACCC", "ACCG", "ACCT", + "ACGA", "ACGC", "ACGG", "ACGT", "ACTA", "ACTC", "ACTG", "ACTT", + "AGAA", "AGAC", "AGAG", "AGAT", "AGCA", "AGCC", "AGCG", "AGCT", + "AGGA", "AGGC", "AGGG", "AGGT", "AGTA", "AGTC", "AGTG", "AGTT", + "ATAA", "ATAC", "ATAG", "ATAT", "ATCA", "ATCC", "ATCG", "ATCT", + "ATGA", "ATGC", "ATGG", "ATGT", "ATTA", "ATTC", "ATTG", "ATTT", + "CAAA", "CAAC", "CAAG", "CAAT", "CACA", "CACC", "CACG", "CACT", + "CAGA", "CAGC", "CAGG", "CAGT", "CATA", "CATC", "CATG", "CATT", + "CCAA", "CCAC", "CCAG", "CCAT", "CCCA", "CCCC", "CCCG", "CCCT", + "CCGA", "CCGC", "CCGG", "CCGT", "CCTA", "CCTC", "CCTG", "CCTT", + "CGAA", "CGAC", "CGAG", "CGAT", "CGCA", "CGCC", "CGCG", "CGCT", + "CGGA", "CGGC", "CGGG", "CGGT", "CGTA", "CGTC", "CGTG", "CGTT", + "CTAA", "CTAC", "CTAG", "CTAT", "CTCA", "CTCC", "CTCG", "CTCT", + "CTGA", "CTGC", "CTGG", "CTGT", "CTTA", "CTTC", "CTTG", "CTTT", + "GAAA", "GAAC", "GAAG", "GAAT", "GACA", "GACC", "GACG", "GACT", + "GAGA", "GAGC", "GAGG", "GAGT", "GATA", "GATC", "GATG", "GATT", + "GCAA", "GCAC", "GCAG", "GCAT", "GCCA", "GCCC", "GCCG", "GCCT", + "GCGA", "GCGC", "GCGG", "GCGT", "GCTA", "GCTC", "GCTG", "GCTT", + "GGAA", "GGAC", "GGAG", "GGAT", "GGCA", "GGCC", "GGCG", "GGCT", + "GGGA", "GGGC", "GGGG", "GGGT", "GGTA", "GGTC", "GGTG", "GGTT", + "GTAA", "GTAC", "GTAG", "GTAT", "GTCA", "GTCC", "GTCG", "GTCT", + "GTGA", "GTGC", "GTGG", "GTGT", "GTTA", "GTTC", "GTTG", "GTTT", + "TAAA", "TAAC", "TAAG", "TAAT", "TACA", "TACC", "TACG", "TACT", + "TAGA", "TAGC", "TAGG", "TAGT", "TATA", "TATC", "TATG", "TATT", + "TCAA", "TCAC", "TCAG", "TCAT", "TCCA", "TCCC", "TCCG", "TCCT", + "TCGA", "TCGC", "TCGG", "TCGT", "TCTA", "TCTC", "TCTG", "TCTT", + "TGAA", "TGAC", "TGAG", "TGAT", "TGCA", "TGCC", "TGCG", "TGCT", + "TGGA", "TGGC", "TGGG", "TGGT", "TGTA", "TGTC", "TGTG", "TGTT", + "TTAA", "TTAC", "TTAG", "TTAT", "TTCA", "TTCC", "TTCG", "TTCT", + "TTGA", "TTGC", "TTGG", "TTGT", "TTTA", "TTTC", "TTTG", "TTTT" +}; + + +/* Function declarations. */ +void run_decode( int argc, char *argv[] ); +void print_usage(); +void motif_print( uint motif, uint count ); + + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MAIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */ + + +int main( int argc, char *argv[] ) +{ + if ( argc == 1 ) { + print_usage(); + } + + run_decode( argc, argv ); + + return EXIT_SUCCESS; +} + + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FUNCTIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */ + + +void print_usage() +{ + /* Martin A. Hansen, September 2008 */ + + /* Print usage and exit. */ + + fprintf( stderr, + "Usage: bipartite_decode > result.tab\n" + ); + + exit( EXIT_SUCCESS ); +} + + +void run_decode( int argc, char *argv[] ) +{ + /* Martin A. Hansen, September 2008 */ + + /* For each file in argv decode the file of */ + /* bipartite motifs and output the motifs */ + /* and their count. */ + + FILE *fp = NULL; + int i = 0; + uint motif = 0; + uint count = 0; + + for ( i = 1; i < argc; i++ ) + { + fp = read_open( argv[ i ] ); + + while ( fscanf( fp, "%u\t%u\n", &motif, &count ) ) + { + assert( ferror( fp ) == 0 ); + + motif_print( motif, count ); + + if ( feof( fp ) ) { + break; + } + } + + close_stream( fp ); + } +} + + +void motif_print( uint motif, uint count ) +{ + /* Martin A. Hansen, September 2008 */ + + /* Converts a binary encoded bipartite motif */ + /* into DNA and output the motif, distance and */ + /* count seperated by tabs: */ + /* BLOCK1 \t BLOCK2 \t DIST \t COUNT */ + + uchar bin1 = 0; + uchar bin2 = 0; + ushort dist = 0; + uint motif_cpy = motif; + + dist = ( ushort ) motif & BLOCK_MASK; + + motif >>= sizeof( uchar ) * BITS_IN_BYTE; + + bin2 = ( uchar ) motif; + + motif >>= sizeof( uchar ) * BITS_IN_BYTE; + + bin1 = ( uchar ) motif; + + printf( "%u\t%s\t%s\t%d\t%d\n", motif_cpy, bin2dna[ bin1 ], bin2dna[ bin2 ], dist, count ); +} + + +/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */ + + -- 2.39.5