-/* Smith-Waterman alignments against sequences within a fastq file.
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-match :
+ * Smith-Waterman alignments against sequences within a fastq file.
*
- * Daniel Jones <dcjones@cs.washington.edu>
*/
+
+#include "common.h"
+#include "parse.h"
+#include "sw.h"
#include <stdlib.h>
+#include <string.h>
#include <zlib.h>
#include <getopt.h>
-#include "swsse2/blosum62.h"
-#include "swsse2/swsse2.h"
-#include "swsse2/matrix.h"
-#include "swsse2/swstriped.h"
-#include "kseq.h"
-KSEQ_INIT(gzFile, gzread)
-
-
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
#endif
-static int help_flag;
+static const char* prog_name = "fastq-match";
void print_help()
{
- fprintf( stderr,
+ fprintf(stdout,
"fastq-match [OPTION]... QUERY [FILE]...\n"
"Perform Smith-Waterman local alignment of a query sequence\n"
"against each sequence in a fastq file.\n\n"
"Options:\n"
" -h, --help print this message\n"
+" -V, --version output version information and exit\n"
);
}
-void convert_sequence(unsigned char* s, int n)
-{
- int i;
- for (i = 0; i < n; i++) {
- s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
- }
-}
-
-
-void fastq_match(gzFile fin, FILE* fout,
- SwStripedData* sw_data,
- unsigned char* query, int n,
- SEARCH_OPTIONS* options)
+void fastq_match(FILE* fin, FILE* fout, sw_t* sw)
{
int score;
- int gap_init = -(options->gapInit + options->gapExt);
- int gap_ext = -options->gapExt;
- int threshold = options->threshold;
-
- kseq_t* seq;
- seq = kseq_init(fin);
+ fastq_t* fqf = fastq_create(fin);
+ seq_t* seq = seq_create();
- while (kseq_read(seq) >= 0) {
+ while (fastq_read(fqf, seq)) {
fprintf(fout, "%s\t", seq->seq.s);
- convert_sequence((unsigned char*)seq->seq.s, seq->seq.l);
-
- score = swStripedByte(query, n,
- (unsigned char*)seq->seq.s, seq->seq.l,
- gap_init, gap_ext,
- sw_data->pvbQueryProf,
- sw_data->pvH1,
- sw_data->pvH2,
- sw_data->pvE,
- sw_data->bias);
- if (score >= 255) {
- score = swStripedWord(query, n,
- (unsigned char*)seq->seq.s, seq->seq.l,
- gap_init, gap_ext,
- sw_data->pvbQueryProf,
- sw_data->pvH1,
- sw_data->pvH2,
- sw_data->pvE);
- }
+ fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n);
+ score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n);
fprintf(fout, "%d\n", score);
}
- kseq_destroy(seq);
+ seq_free(seq);
+ fastq_free(fqf);
}
unsigned char* query;
int query_len;
- SwStripedData* sw_data;
- signed char* mat = blosum62;
- SEARCH_OPTIONS options;
- options.gapInit = -10;
- options.gapExt = -2;
- options.threshold = -1;
+ sw_t* sw;
FILE* fin;
- gzFile gzfin;
-
- help_flag = 0;
int opt;
int opt_idx;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "hV", long_options, &opt_idx);
- if( opt == -1 ) break;
+ if (opt == -1) break;
switch (opt) {
case 0:
break;
case 'h':
- help_flag = 1;
- break;
+ print_help();
+ return 0;
+
+ case 'V':
+ print_version(stdout, prog_name);
+ return 0;
case '?':
return 1;
}
}
- if (help_flag) {
- print_help();
- return 0;
- }
if (optind >= argc) {
fprintf(stderr, "A query sequence must be specified.\n");
query = (unsigned char*)argv[optind++];
query_len = strlen((char*)query);
- convert_sequence(query, query_len);
+ fastq_sw_conv_seq(query, query_len);
- sw_data = swStripedInit(query, query_len, mat);
+ sw = fastq_alloc_sw(query, query_len);
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- gzfin = gzdopen( fileno(stdin), "rb" );
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file 'stdin'.\n");
- return 1;
- }
-
- fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
- gzclose(gzfin);
+ fastq_match(stdin, stdout, sw);
}
else {
for (; optind < argc; optind++) {
continue;
}
- gzfin = gzdopen(fileno(fin), "rb");
- if (gzfin == NULL) {
- fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
- continue;
- }
-
- fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
-
- gzclose(gzfin);
+ fastq_match(fin, stdout, sw);
}
}
- swStripedComplete(sw_data);
+ fastq_free_sw(sw);
return 0;
}