X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ffastq-match.c;h=53af1343c1756f5389dfa2ba18d1b6d9e615fb3a;hb=e25e049490086a631d12bc56785f1a1845877541;hp=ccfca73860501d6b1cf32915ff25632ca7230a46;hpb=54e8755d8c3bf9df0e27aae9ac6ee8976d5943c4;p=fastq-tools.git diff --git a/src/fastq-match.c b/src/fastq-match.c index ccfca73..53af134 100644 --- a/src/fastq-match.c +++ b/src/fastq-match.c @@ -1,21 +1,23 @@ -/* Smith-Waterman alignments against sequences within a fastq file. +/* + * This file is part of fastq-tools. + * + * Copyright (c) 2011 by Daniel C. Jones + * + * fastq-match : + * Smith-Waterman alignments against sequences within a fastq file. * - * Daniel Jones */ + +#include "common.h" +#include "parse.h" +#include "sw.h" #include +#include #include #include -#include "swsse2/blosum62.h" -#include "swsse2/swsse2.h" -#include "swsse2/matrix.h" -#include "swsse2/swstriped.h" -#include "kseq.h" -KSEQ_INIT(gzFile, gzread) - - #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include @@ -25,71 +27,42 @@ KSEQ_INIT(gzFile, gzread) #endif -static int help_flag; +static const char* prog_name = "fastq-match"; void print_help() { - fprintf( stderr, + fprintf(stdout, "fastq-match [OPTION]... QUERY [FILE]...\n" "Perform Smith-Waterman local alignment of a query sequence\n" "against each sequence in a fastq file.\n\n" "Options:\n" " -h, --help print this message\n" +" -V, --version output version information and exit\n" ); } -void convert_sequence(unsigned char* s, int n) -{ - int i; - for (i = 0; i < n; i++) { - s[i] = (char)AMINO_ACID_VALUE[(int)s[i]]; - } -} - - -void fastq_match(gzFile fin, FILE* fout, - SwStripedData* sw_data, - unsigned char* query, int n, - SEARCH_OPTIONS* options) +void fastq_match(FILE* fin, FILE* fout, + sw_t* sw, + unsigned char* query, int n) { int score; - int gap_init = -(options->gapInit + options->gapExt); - int gap_ext = -options->gapExt; - int threshold = options->threshold; - - kseq_t* seq; - seq = kseq_init(fin); + fastq_t* fqf = fastq_open(fin); + seq_t* seq = fastq_alloc_seq(); - while (kseq_read(seq) >= 0) { + while (fastq_next(fqf, seq)) { fprintf(fout, "%s\t", seq->seq.s); - convert_sequence((unsigned char*)seq->seq.s, seq->seq.l); - - score = swStripedByte(query, n, - (unsigned char*)seq->seq.s, seq->seq.l, - gap_init, gap_ext, - sw_data->pvbQueryProf, - sw_data->pvH1, - sw_data->pvH2, - sw_data->pvE, - sw_data->bias); - if (score >= 255) { - score = swStripedWord(query, n, - (unsigned char*)seq->seq.s, seq->seq.l, - gap_init, gap_ext, - sw_data->pvbQueryProf, - sw_data->pvH1, - sw_data->pvH2, - sw_data->pvE); - } + fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n); + score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n); fprintf(fout, "%d\n", score); } - kseq_destroy(seq); + fastq_free_seq(seq); + fastq_close(fqf); } @@ -101,33 +74,26 @@ int main(int argc, char* argv[]) unsigned char* query; int query_len; - SwStripedData* sw_data; - signed char* mat = blosum62; - SEARCH_OPTIONS options; - options.gapInit = -10; - options.gapExt = -2; - options.threshold = -1; + sw_t* sw; FILE* fin; - gzFile gzfin; - - help_flag = 0; int opt; int opt_idx; static struct option long_options[] = { - {"help", no_argument, &help_flag, 1}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, {0, 0, 0, 0} }; while (1) { - opt = getopt_long(argc, argv, "h", long_options, &opt_idx); + opt = getopt_long(argc, argv, "hV", long_options, &opt_idx); - if( opt == -1 ) break; + if (opt == -1) break; switch (opt) { case 0: @@ -137,8 +103,12 @@ int main(int argc, char* argv[]) break; case 'h': - help_flag = 1; - break; + print_help(); + return 0; + + case 'V': + print_version(stdout, prog_name); + return 0; case '?': return 1; @@ -148,10 +118,6 @@ int main(int argc, char* argv[]) } } - if (help_flag) { - print_help(); - return 0; - } if (optind >= argc) { fprintf(stderr, "A query sequence must be specified.\n"); @@ -160,20 +126,12 @@ int main(int argc, char* argv[]) query = (unsigned char*)argv[optind++]; query_len = strlen((char*)query); - convert_sequence(query, query_len); + fastq_sw_conv_seq(query, query_len); - sw_data = swStripedInit(query, query_len, mat); + sw = fastq_alloc_sw(query, query_len); if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - gzfin = gzdopen( fileno(stdin), "rb" ); - if (gzfin == NULL) { - fprintf(stderr, "Malformed file 'stdin'.\n"); - return 1; - } - - fastq_match(gzfin, stdout, sw_data, query, query_len, &options); - - gzclose(gzfin); + fastq_match(stdin, stdout, sw, query, query_len); } else { for (; optind < argc; optind++) { @@ -183,19 +141,11 @@ int main(int argc, char* argv[]) continue; } - gzfin = gzdopen(fileno(fin), "rb"); - if (gzfin == NULL) { - fprintf(stderr, "Malformed file '%s'.\n", argv[optind]); - continue; - } - - fastq_match(gzfin, stdout, sw_data, query, query_len, &options); - - gzclose(gzfin); + fastq_match(fin, stdout, sw, query, query_len); } } - swStripedComplete(sw_data); + fastq_free_sw(sw); return 0; }