2 /* Smith-Waterman alignments against sequences within a fastq file.
4 * Daniel Jones <dcjones@cs.washington.edu>
11 #include "swsse2/blosum62.h"
12 #include "swsse2/swsse2.h"
13 #include "swsse2/matrix.h"
14 #include "swsse2/swstriped.h"
16 KSEQ_INIT(gzFile, gzread)
19 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
22 # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
24 # define SET_BINARY_MODE(file)
34 "fastq-match [OPTION]... QUERY [FILE]...\n"
35 "Perform Smith-Waterman local alignment of a query sequence\n"
36 "against each sequence in a fastq file.\n\n"
38 " -h, --help print this message\n"
43 void convert_sequence(unsigned char* s, int n)
46 for (i = 0; i < n; i++) {
47 s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
52 void fastq_match(gzFile fin, FILE* fout,
53 SwStripedData* sw_data,
54 unsigned char* query, int n,
55 SEARCH_OPTIONS* options)
59 int gap_init = -(options->gapInit + options->gapExt);
60 int gap_ext = -options->gapExt;
61 int threshold = options->threshold;
66 while (kseq_read(seq) >= 0) {
67 fprintf(fout, "%s\t", seq->seq.s);
69 convert_sequence((unsigned char*)seq->seq.s, seq->seq.l);
71 score = swStripedByte(query, n,
72 (unsigned char*)seq->seq.s, seq->seq.l,
74 sw_data->pvbQueryProf,
80 score = swStripedWord(query, n,
81 (unsigned char*)seq->seq.s, seq->seq.l,
83 sw_data->pvbQueryProf,
89 fprintf(fout, "%d\n", score);
97 int main(int argc, char* argv[])
99 SET_BINARY_MODE(stdin);
100 SET_BINARY_MODE(stdout);
102 unsigned char* query;
104 SwStripedData* sw_data;
105 signed char* mat = blosum62;
106 SEARCH_OPTIONS options;
108 options.gapInit = -10;
110 options.threshold = -1;
120 static struct option long_options[] =
122 {"help", no_argument, &help_flag, 1},
128 opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
130 if( opt == -1 ) break;
134 if (long_options[opt_idx].flag != 0) break;
156 if (optind >= argc) {
157 fprintf(stderr, "A query sequence must be specified.\n");
161 query = (unsigned char*)argv[optind++];
162 query_len = strlen((char*)query);
163 convert_sequence(query, query_len);
165 sw_data = swStripedInit(query, query_len, mat);
167 if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
168 gzfin = gzdopen( fileno(stdin), "rb" );
170 fprintf(stderr, "Malformed file 'stdin'.\n");
174 fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
179 for (; optind < argc; optind++) {
180 fin = fopen(argv[optind], "rb");
182 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
186 gzfin = gzdopen(fileno(fin), "rb");
188 fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
192 fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
198 swStripedComplete(sw_data);