]> git.donarmstrong.com Git - fastq-tools.git/blob - src/fastq-match.c
a new and improved parser
[fastq-tools.git] / src / fastq-match.c
1
2 /*
3  * This file is part of fastq-tools.
4  *
5  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
6  *
7  * fastq-match :
8  * Smith-Waterman alignments against sequences within a fastq file.
9  *
10  */
11
12
13 #include "fastq-common.h"
14 #include "fastq-parse.h"
15 #include "swsse2/blosum62.h"
16 #include "swsse2/swsse2.h"
17 #include "swsse2/matrix.h"
18 #include "swsse2/swstriped.h"
19 #include <stdlib.h>
20 #include <string.h>
21 #include <zlib.h>
22 #include <getopt.h>
23
24 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
25 #  include <fcntl.h>
26 #  include <io.h>
27 #  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
28 #else
29 #  define SET_BINARY_MODE(file)
30 #endif
31
32
33 static int help_flag;
34
35
36 void print_help()
37 {
38     fprintf( stderr, 
39 "fastq-match [OPTION]... QUERY [FILE]...\n"
40 "Perform Smith-Waterman local alignment of a query sequence\n"
41 "against each sequence in a fastq file.\n\n"
42 "Options:\n"
43 "  -h, --help              print this message\n"
44     );
45 }
46
47
48 void convert_sequence(unsigned char* s, int n)
49 {
50     int i;
51     for (i = 0; i < n; i++) {
52         s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
53     }
54 }
55
56
57 void fastq_match(FILE* fin, FILE* fout,
58                  SwStripedData* sw_data,
59                  unsigned char* query, int n,
60                  SEARCH_OPTIONS* options)
61 {
62     int score;
63
64     int gap_init  = -(options->gapInit + options->gapExt);
65     int gap_ext   = -options->gapExt;
66     int threshold = options->threshold;
67
68     fastq_t* fqf = fastq_open(fin);
69     seq_t* seq = fastq_alloc_seq();
70
71     while (fastq_next(fqf, seq)) {
72         fprintf(fout, "%s\t", seq->seq.s);
73
74         convert_sequence((unsigned char*)seq->seq.s, seq->seq.n);
75
76         score = swStripedByte(query, n,
77                               (unsigned char*)seq->seq.s, seq->seq.n,
78                               gap_init, gap_ext,
79                               sw_data->pvbQueryProf,
80                               sw_data->pvH1,
81                               sw_data->pvH2,
82                               sw_data->pvE,
83                               sw_data->bias);
84         if (score >= 255) {
85             score = swStripedWord(query, n,
86                                   (unsigned char*)seq->seq.s, seq->seq.n,
87                                   gap_init, gap_ext,
88                                   sw_data->pvbQueryProf,
89                                   sw_data->pvH1,
90                                   sw_data->pvH2,
91                                   sw_data->pvE);
92         }
93
94         fprintf(fout, "%d\n", score);
95     }
96
97     fastq_free_seq(seq);
98     fastq_close(fqf);
99 }
100
101
102
103 int main(int argc, char* argv[])
104 {
105     SET_BINARY_MODE(stdin);
106     SET_BINARY_MODE(stdout);
107
108     unsigned char* query;
109     int query_len;
110     SwStripedData* sw_data;
111     signed char* mat = blosum62;
112     SEARCH_OPTIONS options;
113
114     options.gapInit   = -10;
115     options.gapExt    = -2;
116     options.threshold = -1;
117
118     FILE*  fin;
119
120     help_flag = 0;
121
122     int opt;
123     int opt_idx;
124
125     static struct option long_options[] =
126         { 
127           {"help", no_argument, &help_flag, 1},
128           {0, 0, 0, 0}
129         };
130
131
132     while (1) {
133         opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
134
135         if( opt == -1 ) break;
136
137         switch (opt) {
138             case 0:
139                 if (long_options[opt_idx].flag != 0) break;
140                 if (optarg) {
141                 }
142                 break;
143
144             case 'h':
145                 help_flag = 1;
146                 break;
147
148             case '?':
149                 return 1;
150
151             default:
152                 abort();
153         }
154     }
155
156     if (help_flag) {
157         print_help();
158         return 0;
159     }
160
161     if (optind >= argc) {
162         fprintf(stderr, "A query sequence must be specified.\n");
163         return 1;
164     }
165
166     query = (unsigned char*)argv[optind++];
167     query_len = strlen((char*)query);
168     convert_sequence(query, query_len);
169
170     sw_data = swStripedInit(query, query_len, mat);
171
172     if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
173         fastq_match(stdin, stdout, sw_data, query, query_len, &options);
174     }
175     else {
176         for (; optind < argc; optind++) {
177             fin = fopen(argv[optind], "rb");
178             if (fin == NULL) {
179                 fprintf(stderr, "No such file '%s'.\n", argv[optind]);
180                 continue;
181             }
182
183             fastq_match(fin, stdout, sw_data, query, query_len, &options);
184         }
185     }
186
187     swStripedComplete(sw_data);
188
189     return 0;
190 }
191
192
193