]> git.donarmstrong.com Git - fastq-tools.git/blobdiff - src/fastq-match.c
reimplemented smith-waterman
[fastq-tools.git] / src / fastq-match.c
index ccfca73860501d6b1cf32915ff25632ca7230a46..7b55f07c50b53f78947e37f121eb6a1f97b855f5 100644 (file)
@@ -1,21 +1,23 @@
 
-/* Smith-Waterman alignments against sequences within a fastq file.
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-match :
+ * Smith-Waterman alignments against sequences within a fastq file.
  *
- * Daniel Jones <dcjones@cs.washington.edu>
  */
 
+
+#include "fastq-common.h"
+#include "fastq-parse.h"
+#include "fastq-sw.h"
 #include <stdlib.h>
+#include <string.h>
 #include <zlib.h>
 #include <getopt.h>
 
-#include "swsse2/blosum62.h"
-#include "swsse2/swsse2.h"
-#include "swsse2/matrix.h"
-#include "swsse2/swstriped.h"
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
-
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
 #  include <fcntl.h>
 #  include <io.h>
@@ -40,56 +42,28 @@ void print_help()
 }
 
 
-void convert_sequence(unsigned char* s, int n)
-{
-    int i;
-    for (i = 0; i < n; i++) {
-        s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
-    }
-}
 
 
-void fastq_match(gzFile fin, FILE* fout,
-                 SwStripedData* sw_data,
-                 unsigned char* query, int n,
-                 SEARCH_OPTIONS* options)
+void fastq_match(FILE* fin, FILE* fout,
+                 sw_t* sw,
+                 unsigned char* query, int n)
 {
     int score;
 
-    int gap_init  = -(options->gapInit + options->gapExt);
-    int gap_ext   = -options->gapExt;
-    int threshold = options->threshold;
-
-    kseq_t* seq;
-    seq = kseq_init(fin);
+    fastq_t* fqf = fastq_open(fin);
+    seq_t* seq = fastq_alloc_seq();
 
-    while (kseq_read(seq) >= 0) {
+    while (fastq_next(fqf, seq)) {
         fprintf(fout, "%s\t", seq->seq.s);
 
-        convert_sequence((unsigned char*)seq->seq.s, seq->seq.l);
-
-        score = swStripedByte(query, n,
-                              (unsigned char*)seq->seq.s, seq->seq.l,
-                              gap_init, gap_ext,
-                              sw_data->pvbQueryProf,
-                              sw_data->pvH1,
-                              sw_data->pvH2,
-                              sw_data->pvE,
-                              sw_data->bias);
-        if (score >= 255) {
-            score = swStripedWord(query, n,
-                                  (unsigned char*)seq->seq.s, seq->seq.l,
-                                  gap_init, gap_ext,
-                                  sw_data->pvbQueryProf,
-                                  sw_data->pvH1,
-                                  sw_data->pvH2,
-                                  sw_data->pvE);
-        }
+        fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n);
+        score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n);
 
         fprintf(fout, "%d\n", score);
     }
 
-    kseq_destroy(seq);
+    fastq_free_seq(seq);
+    fastq_close(fqf);
 }
 
 
@@ -101,16 +75,10 @@ int main(int argc, char* argv[])
 
     unsigned char* query;
     int query_len;
-    SwStripedData* sw_data;
-    signed char* mat = blosum62;
-    SEARCH_OPTIONS options;
 
-    options.gapInit   = -10;
-    options.gapExt    = -2;
-    options.threshold = -1;
+    sw_t* sw;
 
     FILE*  fin;
-    gzFile gzfin;
 
     help_flag = 0;
 
@@ -119,7 +87,9 @@ int main(int argc, char* argv[])
 
     static struct option long_options[] =
         { 
-          {"help", no_argument, &help_flag, 1},
+          {"help",       no_argument, &help_flag, 1},
+          {"gap-init",   required_argument, NULL, 0},
+          {"gap-extend", required_argument, NULL, 0},
           {0, 0, 0, 0}
         };
 
@@ -160,20 +130,12 @@ int main(int argc, char* argv[])
 
     query = (unsigned char*)argv[optind++];
     query_len = strlen((char*)query);
-    convert_sequence(query, query_len);
+    fastq_sw_conv_seq(query, query_len);
 
-    sw_data = swStripedInit(query, query_len, mat);
+    sw = fastq_alloc_sw(query, query_len);
 
     if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
-        gzfin = gzdopen( fileno(stdin), "rb" );
-        if (gzfin == NULL) {
-            fprintf(stderr, "Malformed file 'stdin'.\n");
-            return 1;
-        }
-
-        fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
-        gzclose(gzfin);
+        fastq_match(stdin, stdout, sw, query, query_len);
     }
     else {
         for (; optind < argc; optind++) {
@@ -183,19 +145,11 @@ int main(int argc, char* argv[])
                 continue;
             }
 
-            gzfin = gzdopen(fileno(fin), "rb");
-            if (gzfin == NULL) {
-                fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
-                continue;
-            }
-
-            fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
-            gzclose(gzfin);
+            fastq_match(fin, stdout, sw, query, query_len);
         }
     }
 
-    swStripedComplete(sw_data);
+    fastq_free_sw(sw);
 
     return 0;
 }