X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ffastq-match.c;h=7b55f07c50b53f78947e37f121eb6a1f97b855f5;hb=7295fe6abb7eb2f5f25f3c82e9cbd3c389ae9370;hp=ccfca73860501d6b1cf32915ff25632ca7230a46;hpb=54e8755d8c3bf9df0e27aae9ac6ee8976d5943c4;p=fastq-tools.git diff --git a/src/fastq-match.c b/src/fastq-match.c index ccfca73..7b55f07 100644 --- a/src/fastq-match.c +++ b/src/fastq-match.c @@ -1,21 +1,23 @@ -/* Smith-Waterman alignments against sequences within a fastq file. +/* + * This file is part of fastq-tools. + * + * Copyright (c) 2011 by Daniel C. Jones + * + * fastq-match : + * Smith-Waterman alignments against sequences within a fastq file. * - * Daniel Jones */ + +#include "fastq-common.h" +#include "fastq-parse.h" +#include "fastq-sw.h" #include +#include #include #include -#include "swsse2/blosum62.h" -#include "swsse2/swsse2.h" -#include "swsse2/matrix.h" -#include "swsse2/swstriped.h" -#include "kseq.h" -KSEQ_INIT(gzFile, gzread) - - #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include @@ -40,56 +42,28 @@ void print_help() } -void convert_sequence(unsigned char* s, int n) -{ - int i; - for (i = 0; i < n; i++) { - s[i] = (char)AMINO_ACID_VALUE[(int)s[i]]; - } -} -void fastq_match(gzFile fin, FILE* fout, - SwStripedData* sw_data, - unsigned char* query, int n, - SEARCH_OPTIONS* options) +void fastq_match(FILE* fin, FILE* fout, + sw_t* sw, + unsigned char* query, int n) { int score; - int gap_init = -(options->gapInit + options->gapExt); - int gap_ext = -options->gapExt; - int threshold = options->threshold; - - kseq_t* seq; - seq = kseq_init(fin); + fastq_t* fqf = fastq_open(fin); + seq_t* seq = fastq_alloc_seq(); - while (kseq_read(seq) >= 0) { + while (fastq_next(fqf, seq)) { fprintf(fout, "%s\t", seq->seq.s); - convert_sequence((unsigned char*)seq->seq.s, seq->seq.l); - - score = swStripedByte(query, n, - (unsigned char*)seq->seq.s, seq->seq.l, - gap_init, gap_ext, - sw_data->pvbQueryProf, - sw_data->pvH1, - sw_data->pvH2, - sw_data->pvE, - sw_data->bias); - if (score >= 255) { - score = swStripedWord(query, n, - (unsigned char*)seq->seq.s, seq->seq.l, - gap_init, gap_ext, - sw_data->pvbQueryProf, - sw_data->pvH1, - sw_data->pvH2, - sw_data->pvE); - } + fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n); + score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n); fprintf(fout, "%d\n", score); } - kseq_destroy(seq); + fastq_free_seq(seq); + fastq_close(fqf); } @@ -101,16 +75,10 @@ int main(int argc, char* argv[]) unsigned char* query; int query_len; - SwStripedData* sw_data; - signed char* mat = blosum62; - SEARCH_OPTIONS options; - options.gapInit = -10; - options.gapExt = -2; - options.threshold = -1; + sw_t* sw; FILE* fin; - gzFile gzfin; help_flag = 0; @@ -119,7 +87,9 @@ int main(int argc, char* argv[]) static struct option long_options[] = { - {"help", no_argument, &help_flag, 1}, + {"help", no_argument, &help_flag, 1}, + {"gap-init", required_argument, NULL, 0}, + {"gap-extend", required_argument, NULL, 0}, {0, 0, 0, 0} }; @@ -160,20 +130,12 @@ int main(int argc, char* argv[]) query = (unsigned char*)argv[optind++]; query_len = strlen((char*)query); - convert_sequence(query, query_len); + fastq_sw_conv_seq(query, query_len); - sw_data = swStripedInit(query, query_len, mat); + sw = fastq_alloc_sw(query, query_len); if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - gzfin = gzdopen( fileno(stdin), "rb" ); - if (gzfin == NULL) { - fprintf(stderr, "Malformed file 'stdin'.\n"); - return 1; - } - - fastq_match(gzfin, stdout, sw_data, query, query_len, &options); - - gzclose(gzfin); + fastq_match(stdin, stdout, sw, query, query_len); } else { for (; optind < argc; optind++) { @@ -183,19 +145,11 @@ int main(int argc, char* argv[]) continue; } - gzfin = gzdopen(fileno(fin), "rb"); - if (gzfin == NULL) { - fprintf(stderr, "Malformed file '%s'.\n", argv[optind]); - continue; - } - - fastq_match(gzfin, stdout, sw_data, query, query_len, &options); - - gzclose(gzfin); + fastq_match(fin, stdout, sw, query, query_len); } } - swStripedComplete(sw_data); + fastq_free_sw(sw); return 0; }