3 * This file is part of fastq-tools.
5 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
8 * Smith-Waterman alignments against sequences within a fastq file.
13 #include "fastq-common.h"
14 #include "fastq-parse.h"
15 #include "swsse2/blosum62.h"
16 #include "swsse2/swsse2.h"
17 #include "swsse2/matrix.h"
18 #include "swsse2/swstriped.h"
24 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
27 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
29 # define SET_BINARY_MODE(file)
39 "fastq-match [OPTION]... QUERY [FILE]...\n"
40 "Perform Smith-Waterman local alignment of a query sequence\n"
41 "against each sequence in a fastq file.\n\n"
43 " -h, --help print this message\n"
48 void convert_sequence(unsigned char* s, int n)
51 for (i = 0; i < n; i++) {
52 s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
57 void fastq_match(FILE* fin, FILE* fout,
58 SwStripedData* sw_data,
59 unsigned char* query, int n,
60 SEARCH_OPTIONS* options)
64 int gap_init = -(options->gapInit + options->gapExt);
65 int gap_ext = -options->gapExt;
66 int threshold = options->threshold;
68 fastq_t* fqf = fastq_open(fin);
69 seq_t* seq = fastq_alloc_seq();
71 while (fastq_next(fqf, seq)) {
72 fprintf(fout, "%s\t", seq->seq.s);
74 convert_sequence((unsigned char*)seq->seq.s, seq->seq.n);
76 score = swStripedByte(query, n,
77 (unsigned char*)seq->seq.s, seq->seq.n,
79 sw_data->pvbQueryProf,
85 score = swStripedWord(query, n,
86 (unsigned char*)seq->seq.s, seq->seq.n,
88 sw_data->pvbQueryProf,
94 fprintf(fout, "%d\n", score);
103 int main(int argc, char* argv[])
105 SET_BINARY_MODE(stdin);
106 SET_BINARY_MODE(stdout);
108 unsigned char* query;
110 SwStripedData* sw_data;
111 signed char* mat = blosum62;
112 SEARCH_OPTIONS options;
114 options.gapInit = -10;
116 options.threshold = -1;
125 static struct option long_options[] =
127 {"help", no_argument, &help_flag, 1},
133 opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
135 if( opt == -1 ) break;
139 if (long_options[opt_idx].flag != 0) break;
161 if (optind >= argc) {
162 fprintf(stderr, "A query sequence must be specified.\n");
166 query = (unsigned char*)argv[optind++];
167 query_len = strlen((char*)query);
168 convert_sequence(query, query_len);
170 sw_data = swStripedInit(query, query_len, mat);
172 if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
173 fastq_match(stdin, stdout, sw_data, query, query_len, &options);
176 for (; optind < argc; optind++) {
177 fin = fopen(argv[optind], "rb");
179 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
183 fastq_match(fin, stdout, sw_data, query, query_len, &options);
187 swStripedComplete(sw_data);