-/* Smith-Waterman alignments against sequences within a fastq file.
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-match :
+ * Smith-Waterman alignments against sequences within a fastq file.
*
- * Daniel Jones <dcjones@cs.washington.edu>
*/
+
+#include "fastq-common.h"
+#include "fastq-parse.h"
+#include "fastq-sw.h"
#include <stdlib.h>
+#include <string.h>
#include <zlib.h>
#include <getopt.h>
-#include "swsse2/blosum62.h"
-#include "swsse2/swsse2.h"
-#include "swsse2/matrix.h"
-#include "swsse2/swstriped.h"
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
-
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
}
-void convert_sequence(unsigned char* s, int n)
-{
- int i;
- for (i = 0; i < n; i++) {
- s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
- }
-}
-void fastq_match(gzFile fin, FILE* fout,
- SwStripedData* sw_data,
- unsigned char* query, int n,
- SEARCH_OPTIONS* options)
+void fastq_match(FILE* fin, FILE* fout,
+ sw_t* sw,
+ unsigned char* query, int n)
{
int score;
- int gap_init = -(options->gapInit + options->gapExt);
- int gap_ext = -options->gapExt;
- int threshold = options->threshold;
-
- kseq_t* seq;
- seq = kseq_init(fin);
+ fastq_t* fqf = fastq_open(fin);
+ seq_t* seq = fastq_alloc_seq();
- while (kseq_read(seq) >= 0) {
+ while (fastq_next(fqf, seq)) {
fprintf(fout, "%s\t", seq->seq.s);
- convert_sequence((unsigned char*)seq->seq.s, seq->seq.l);
-
- score = swStripedByte(query, n,
- (unsigned char*)seq->seq.s, seq->seq.l,
- gap_init, gap_ext,
- sw_data->pvbQueryProf,
- sw_data->pvH1,
- sw_data->pvH2,
- sw_data->pvE,
- sw_data->bias);
- if (score >= 255) {
- score = swStripedWord(query, n,
- (unsigned char*)seq->seq.s, seq->seq.l,
- gap_init, gap_ext,
- sw_data->pvbQueryProf,
- sw_data->pvH1,
- sw_data->pvH2,
- sw_data->pvE);
- }
+ fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n);
+ score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n);
fprintf(fout, "%d\n", score);
}
- kseq_destroy(seq);
+ fastq_free_seq(seq);
+ fastq_close(fqf);
}
unsigned char* query;
int query_len;
- SwStripedData* sw_data;
- signed char* mat = blosum62;
- SEARCH_OPTIONS options;
- options.gapInit = -10;
- options.gapExt = -2;
- options.threshold = -1;
+ sw_t* sw;
FILE* fin;
- gzFile gzfin;
help_flag = 0;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
+ {"help", no_argument, &help_flag, 1},
+ {"gap-init", required_argument, NULL, 0},
+ {"gap-extend", required_argument, NULL, 0},
{0, 0, 0, 0}
};
query = (unsigned char*)argv[optind++];
query_len = strlen((char*)query);
- convert_sequence(query, query_len);
+ fastq_sw_conv_seq(query, query_len);
- sw_data = swStripedInit(query, query_len, mat);
+ sw = fastq_alloc_sw(query, query_len);
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- gzfin = gzdopen( fileno(stdin), "rb" );
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file 'stdin'.\n");
- return 1;
- }
-
- fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
- gzclose(gzfin);
+ fastq_match(stdin, stdout, sw, query, query_len);
}
else {
for (; optind < argc; optind++) {
continue;
}
- gzfin = gzdopen(fileno(fin), "rb");
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
- continue;
- }
-
- fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
- gzclose(gzfin);
+ fastq_match(fin, stdout, sw, query, query_len);
}
}
- swStripedComplete(sw_data);
+ fastq_free_sw(sw);
return 0;
}