From: Daniel Caleb Jones Date: Thu, 3 Mar 2011 20:06:42 +0000 (-0800) Subject: reimplemented smith-waterman X-Git-Url: https://git.donarmstrong.com/?p=fastq-tools.git;a=commitdiff_plain;h=7295fe6abb7eb2f5f25f3c82e9cbd3c389ae9370 reimplemented smith-waterman --- diff --git a/configure.ac b/configure.ac index 758f315..2681ec5 100644 --- a/configure.ac +++ b/configure.ac @@ -45,7 +45,6 @@ AC_CHECK_HEADER(getopt.h, , CXXFLAGS=$CFLAGS AC_CONFIG_FILES( [Makefile - src/Makefile - src/swsse2/Makefile] ) + src/Makefile] ) AC_OUTPUT diff --git a/src/Makefile.am b/src/Makefile.am index c50f420..e28add9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,15 +1,17 @@ -SUBDIRS = swsse2 - bin_PROGRAMS = fastq-grep fastq-kmers fastq-match -fastq_grep_SOURCES = kseq.h fastq-grep.c fastq-parse.c fastq-common.c +fastq_common_src=fastq-common.h fastq-common.c +fastq_parse_src=fastq-parse.h fastq-parse.c +fastq_sw_src=fastq-sw.h fastq-sw.c + +fastq_grep_SOURCES = fastq-grep.c $(fastq_common_src) $(fastq_parse_src) fastq_grep_LDADD = $(PCRE_LIBS) -lz -fastq_kmers_SOURCES = kseq.h fastq-kmers.c fastq-parse.c fastq-common.c +fastq_kmers_SOURCES = fastq-kmers.c $(fastq_common_src) $(fastq_parse_src) fastq_kmers_LDADD = -lz -fastq_match_SOURCES = kseq.h fastq-match.c fastq-parse.c fastq-common.c -fastq_match_LDADD = swsse2/libswsse2.la -lz -fastq_match_CFLAGS = -msse2 +fastq_match_SOURCES = fastq-match.c $(fastq_common_src) $(fastq_parse_src) $(fastq_sw_src) +fastq_match_LDADD = -lz + diff --git a/src/fastq-match.c b/src/fastq-match.c index 69eff70..7b55f07 100644 --- a/src/fastq-match.c +++ b/src/fastq-match.c @@ -12,10 +12,7 @@ #include "fastq-common.h" #include "fastq-parse.h" -#include "swsse2/blosum62.h" -#include "swsse2/swsse2.h" -#include "swsse2/matrix.h" -#include "swsse2/swstriped.h" +#include "fastq-sw.h" #include #include #include @@ -45,51 +42,22 @@ void print_help() } -void convert_sequence(unsigned char* s, int n) -{ - int i; - for (i = 0; i < n; i++) { - s[i] = (char)AMINO_ACID_VALUE[(int)s[i]]; - } -} void fastq_match(FILE* fin, FILE* fout, - SwStripedData* sw_data, - unsigned char* query, int n, - SEARCH_OPTIONS* options) + sw_t* sw, + unsigned char* query, int n) { int score; - int gap_init = -(options->gapInit + options->gapExt); - int gap_ext = -options->gapExt; - int threshold = options->threshold; - fastq_t* fqf = fastq_open(fin); seq_t* seq = fastq_alloc_seq(); while (fastq_next(fqf, seq)) { fprintf(fout, "%s\t", seq->seq.s); - convert_sequence((unsigned char*)seq->seq.s, seq->seq.n); - - score = swStripedByte(query, n, - (unsigned char*)seq->seq.s, seq->seq.n, - gap_init, gap_ext, - sw_data->pvbQueryProf, - sw_data->pvH1, - sw_data->pvH2, - sw_data->pvE, - sw_data->bias); - if (score >= 255) { - score = swStripedWord(query, n, - (unsigned char*)seq->seq.s, seq->seq.n, - gap_init, gap_ext, - sw_data->pvbQueryProf, - sw_data->pvH1, - sw_data->pvH2, - sw_data->pvE); - } + fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n); + score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n); fprintf(fout, "%d\n", score); } @@ -107,13 +75,8 @@ int main(int argc, char* argv[]) unsigned char* query; int query_len; - SwStripedData* sw_data; - signed char* mat = blosum62; - SEARCH_OPTIONS options; - options.gapInit = -10; - options.gapExt = -2; - options.threshold = -1; + sw_t* sw; FILE* fin; @@ -124,7 +87,9 @@ int main(int argc, char* argv[]) static struct option long_options[] = { - {"help", no_argument, &help_flag, 1}, + {"help", no_argument, &help_flag, 1}, + {"gap-init", required_argument, NULL, 0}, + {"gap-extend", required_argument, NULL, 0}, {0, 0, 0, 0} }; @@ -165,12 +130,12 @@ int main(int argc, char* argv[]) query = (unsigned char*)argv[optind++]; query_len = strlen((char*)query); - convert_sequence(query, query_len); + fastq_sw_conv_seq(query, query_len); - sw_data = swStripedInit(query, query_len, mat); + sw = fastq_alloc_sw(query, query_len); if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) { - fastq_match(stdin, stdout, sw_data, query, query_len, &options); + fastq_match(stdin, stdout, sw, query, query_len); } else { for (; optind < argc; optind++) { @@ -180,11 +145,11 @@ int main(int argc, char* argv[]) continue; } - fastq_match(fin, stdout, sw_data, query, query_len, &options); + fastq_match(fin, stdout, sw, query, query_len); } } - swStripedComplete(sw_data); + fastq_free_sw(sw); return 0; } diff --git a/src/fastq-sw.c b/src/fastq-sw.c new file mode 100644 index 0000000..275adb4 --- /dev/null +++ b/src/fastq-sw.c @@ -0,0 +1,179 @@ +/* + * This file is part of fastq-tools. + * + * Copyright (c) 2011 by Daniel C. Jones + * + * This is a very simple, 'fast enough' implementation of the Smith-Waterman + * algorithm specifically for short nucleotide sequences, working in O(mn) time + * and O(m) space, implemented according to the original Gotoh paper and + * Phil Green's implementation in cross_match. + * + * There is no fancy bit packing or vectorization, but such features would offer + * diminishing returns when aligning short sequences such as high throughput + * sequencing data. For example the Farrar SSE2 algorithm might be a tiny bit + * faster, but would diminish portability. + * + */ + + +#include "fastq-sw.h" +#include "fastq-common.h" +#include +#include + + +static const int sw_default_d[25] = + /* A C G T N */ + { 1, -2, -2, -2 , 0, + -2, 1, -2, -2, 0, + -2, -2, 1, -2, 0, + -2, -2, -2, 1, 0, + 0, 0, 0, 0, 0 }; + +static const int sw_mat50_d[25] = + { 2, -2, 0, -2 , 0, + -2, 2, -2, 0, 0, + 0, -2, 2, -2, 0, + -2, 0, -2, 2, 0, + 0, 0, 0, 0, 0 }; + +static const int sw_mat70_d[25] = + { 2, -2, -1, -2 , 0, + -2, 2, -2, -1, 0, + -1, -2, 2, -2, 0, + -2, -1, -2, 2, 0, + 0, 0, 0, 0, 0 }; + + +static inline int imax(int a, int b) +{ + return a > b ? a : b; +} + +static inline int imax4(int a, int b, int c, int d) +{ + return imax(imax(a,b), imax(c,d)); +} + + + +void fastq_sw_conv_seq(unsigned char* seq, int n) +{ + while (*seq || n--) { + switch (*seq) { + case 'A' : + case 'a': + case 'U': + case 'u': + *seq = 0; + break; + + case 'C': + case 'c': + *seq = 1; + break; + + case 'G': + case 'g': + *seq = 2; + break; + + case 'T': + case 't': + *seq = 3; + break; + + case 'N': + case 'n': + default: + *seq = 4; + } + + seq++; + } +} + + +sw_t* fastq_alloc_sw(const unsigned char* subject, int size) +{ + sw_t* sw = malloc_or_die(sizeof(sw_t)); + memcpy(sw->subject, subject, size); + + /* default cost matrix */ + memcpy(sw->d, sw_default_d, 25 * sizeof(int)); + + /* default gap costs */ + sw->gap_open = -4; + sw->gap_extend = -3; + + sw->work0 = malloc_or_die(size * sizeof(int)); + sw->work1 = malloc_or_die(size * sizeof(int)); + sw->size = size; + + return sw; +} + + +void fastq_free_sw(sw_t* sw) +{ + free(sw->subject); + free(sw->work0); + free(sw->work1); + free(sw); +} + + + +int fastq_sw(sw_t* sw, const unsigned char* x, int n) +{ + /*const int max_score = 65535;*/ + + /* conveniance */ + int* maxstu = sw->work0; + int* t = sw->work1; + int m = sw->size; + const int* d = sw->d; + int gap_op = sw->gap_open; + int gap_ex = sw->gap_extend; + unsigned char* y = sw->subject; + + int i, j; + + /* zero maxstu row */ + memset(maxstu, 0, m * sizeof(int)); + + /* initialize t row to a negative value to prohibit + * leading with gap extensions */ + for (j = 0; j < m; j++) t[j] = -1; + + int u, s; + int maxstu0; + + for (i = 0; i < n; i++) { + + /* special case for column 0 */ + t[j] = imax(maxstu[j] + gap_op, t[j] + gap_ex); + u = gap_op; + maxstu[0] = imax4(0, d[5 * y[j] + x[j]], t[j], u); + maxstu0 = 0; + + + for (j = 1; j < m; j++) { + t[j] = imax(maxstu[j] + gap_op, t[j] + gap_ex); + u = imax(maxstu[j-1] + gap_op, u + gap_ex); + s = maxstu0 + d[5 * y[j] + x[i]]; + maxstu0 = maxstu[j]; + + maxstu[j] = imax4(0, s, t[j], u); + } + } + + /* return maximum from maxstu */ + maxstu0 = 0; + for (j = 0; j + * + * fastq-sw : + * Local alignments of nucleotide sequences via Smith-Waterman. + * + */ + +#ifndef FASTQ_TOOLS_SW_H +#define FASTQ_TOOLS_SW_H + + +typedef struct +{ + unsigned char* subject; + int size; + + int d[25]; /* cost matrix */ + int gap_open; /* gap open */ + int gap_extend; /* gap extend */ + + /* matrix rows, used internally */ + int* work0; + int* work1; + +} sw_t; + +/* convert a n ASCII nucleotide sequence to one suitable for fastq_sw */ +void fastq_sw_conv_seq(unsigned char*, int n); + +sw_t* fastq_alloc_sw(const unsigned char *subject, int size); +void fastq_free_sw(sw_t*); + +int fastq_sw(sw_t*, const unsigned char* query, int size); + +#endif + diff --git a/src/swsse2/COPYRIGHT b/src/swsse2/COPYRIGHT deleted file mode 100644 index 5c63207..0000000 --- a/src/swsse2/COPYRIGHT +++ /dev/null @@ -1,19 +0,0 @@ - - Copyright 2006, by Michael Farrar. All rights reserved. The SWSSE2 - program and documentation may not be sold or incorporated into a - commercial product, in whole or in part, without written consent of - Michael Farrar. - - For further information regarding permission for use or reproduction, - please contact Michael Farrar at: - - farrar.michael@gmail.com - - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/src/swsse2/Makefile.am b/src/swsse2/Makefile.am deleted file mode 100644 index 99094b0..0000000 --- a/src/swsse2/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ - -noinst_LTLIBRARIES = libswsse2.la - -libswsse2_la_SOURCES = fastalib.h fastalib.c \ - matrix.h matrix.c \ - swscalar.h swscalar.c \ - swsse2.h swsse2.c \ - swstriped.h swstriped.c \ - swwozniak.h swwozniak.c \ - blosum62.h blosum62.c - - -libswsse2_la_CFLAGS = -msse2 - -EXTRADIST = README COPYRIGHT - diff --git a/src/swsse2/README b/src/swsse2/README deleted file mode 100644 index 11f01ac..0000000 --- a/src/swsse2/README +++ /dev/null @@ -1,100 +0,0 @@ -Introduction - -The Smith-Waterman [1] algorithm is one of the most sensitive sequencing -algorithms in use today. It is also the slowest due to the number of -calculations needed to perform the search. To speed up the algorithm, it -has been adapted to use Single Instruction Multiple Data, SIMD, instructions -found on many common microprocessors today. SIMD instructions are able to -perform the same operation on multiple pieces of data parallel. - -The program swsse2 introduces a new SIMD implementation of the Smith-Waterman -algorithm for the X86 processor. The weights are precomputed parallel to the -query sequence, like the Rognes [2] implementation, but are accessed in the -striped pattern. The new implementation reached speeds six times faster than -other SIMD implementations. - -Below is a graph comparing the total search times of 11 queries, 3806 residues, -against the Swiss-Prot 49.1 database, 75,841,138 residues. The tests were run -on a PC with a 2.00GHz Intel Xeon Core 2 Duo processor with 2 GB RAM. The -program is singlely threaded, so the number of cores has no affect on the run -times. The Wozniak, Rognes and striped implementations were run with the -scoring matrices BLOSUM50 and BLOSUM62 and four different gap penalties, 10-k, -10-2k, 14-2k and 40-2k. Since the Wozniak's runtime does not change depending -on the scoring matrix, one line is used for both scoring matrices. - -Build Instructions - - * Download the zip file with the swsse2 sources. - * Unzip the sources. - * Load the swsse2.vcproj file into Microsoft Visual C++ 2005. - * Build the project (F7). For optimized code, be sure to change the - configuration to a Release build. - * The swsse2.exe file is in the Release directory ready to be run. - -Running - -To run swsse2 three files must be provided, the scoring matrix, query sequence -and the database sequence. Four scoring matrices are provided with the -release, BLOSUM45, BLOSUM50, BLOSUM62 and BLOSUM80. The query sequence and -database sequence must be in the FASTA format. For example, to run with the -default gap penalties 10-2k, the scoring matrix BLOSUM50, the query sequence -ptest1.fasta and the sequence database db.fasta use: - - c:\swsse2>.\Release\swsse2.exe blosum50.mat ptest1.fasta db.fasta - ptest1.fasta vs db.fasta - Matrix: blosum50.mat, Init: -10, Ext: -2 - - Score Description - 53 108_LYCES Protein 108 precursor. - 53 10KD_VIGUN 10 kDa protein precursor (Clone PSAS10). - 32 1431_ECHGR 14-3-3 protein homolog 1. - 32 1431_ECHMU 14-3-3 protein homolog 1 (Emma14-3-3.1). - 27 110K_PLAKN 110 kDa antigen (PK110) (Fragment). - 26 1432_ECHGR 14-3-3 protein homolog 2. - 25 13S1_FAGES 13S globulin seed storage protein 1 - 25 13S3_FAGES 13S globulin seed storage protein 3 - 25 13S2_FAGES 13S globulin seed storage protein 2 - 23 12S1_ARATH 12S seed storage protein CRA1 - 22 13SB_FAGES 13S globulin basic chain. - 21 12AH_CLOS4 12-alpha-hydroxysteroid dehydrogenase - 21 140U_DROME RPII140-upstream protein. - 21 12S2_ARATH 12S seed storage protein CRB - 21 1431_LYCES 14-3-3 protein 1. - 20 1431_ARATH 14-3-3-like protein GF14 - - 21 residues in query string - 2014 residues in 25 library sequences - Scan time: 0.000 (Striped implementation) - -Options - - Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] [-c num] - matrix query db - - -h : this help message - -n : run a non-vectorized Smith-Waterman search - -w : run a vectorized Wozniak search - -r : run a vectorized Rognes search (NOT SUPPORTED) - -s : run a vectorized striped search (default) - -i num : gap init penalty (default -10) - -e num : gap extension penalty (default -2) - -t num : minimum score threshold (default 20) - -c num : number of scores to be displayed (default 250) - matrix : scoring matrix file - query : query sequence file (fasta format) - db : sequence database file (fasta format) - -Note - -The Rognes implementation is not released as part of the swsse2 package due to -patent concerns. - -References - -[1] Smith, T. F. and Waterman, M. S. (1981) Identification of common molecular subsequences. J. Mol. Biol., 147, 195-197. - -[2] Rognes, T. and Seeberg, E. (2000) Six-fold speed-up of the Smith-Waterman sequence database searches using parallel processing on common microprocessors. Bioinformatics, 16, 699-706. - - - - diff --git a/src/swsse2/blosum62.c b/src/swsse2/blosum62.c deleted file mode 100644 index e5a4aec..0000000 --- a/src/swsse2/blosum62.c +++ /dev/null @@ -1,32 +0,0 @@ - - -signed char blosum62[] = -{ - 4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1, - -2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1, - 0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3, - -2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1, - -1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4, - -2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3, - 0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2, - -2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0, - -1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3, - -1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1, - -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3, - -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1, - -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0, - -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1, - -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3, - -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0, - 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0, - 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1, - 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2, - -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3, - 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, - -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2, - -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4 -}; - - - - diff --git a/src/swsse2/blosum62.h b/src/swsse2/blosum62.h deleted file mode 100644 index c10e625..0000000 --- a/src/swsse2/blosum62.h +++ /dev/null @@ -1,9 +0,0 @@ - -#ifndef BLOSUM62_H_ -#define BLOSUM62_H_ - -extern signed char blosum62[]; - -#endif - - diff --git a/src/swsse2/fastalib.c b/src/swsse2/fastalib.c deleted file mode 100644 index fb2050a..0000000 --- a/src/swsse2/fastalib.c +++ /dev/null @@ -1,210 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#include -#include -#include - -#include "swsse2.h" -#include "fastalib.h" - -#define READ_BUFFER_SIZE (128 * 1024) -#define SEQ_NAME_SIZE (128) - -FASTA_LIB *openLib (char *file, int pad) -{ - FILE *fp; - - FASTA_LIB *lib; - - if ((fp = fopen (file, "r")) == NULL) { - fprintf (stderr, "Unable to open file %s\n", file); - exit (-1); - } - - lib = (FASTA_LIB *) malloc (sizeof (FASTA_LIB)); - if (!lib) { - fprintf (stderr, "Unable to allocate memory for library header\n"); - exit (-1); - } - - lib->readBuffer = (char *) malloc (READ_BUFFER_SIZE); - if (!lib->readBuffer) { - fprintf (stderr, "Unable to allocate memory for read buffer\n"); - exit (-1); - } - - lib->seqBuffer = (unsigned char *) malloc (MAX_SEQ_LENGTH); - if (!lib->seqBuffer) { - fprintf (stderr, "Unable to allocate memory for sequence\n"); - exit (-1); - } - - lib->seqName = (char *) malloc (SEQ_NAME_SIZE); - if (!lib->seqName) { - fprintf (stderr, "Unable to allocate memory for sequence name\n"); - exit (-1); - } - - lib->size = (int) fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp); - if (lib->size == 0 && !feof (fp)) { - fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp)); - exit (-1); - } - - lib->pos = 0; - - lib->fp = fp; - - lib->sequences = 0; - lib->residues = 0; - - lib->pad; - - return lib; -} - -static int -readNextBlock (FASTA_LIB *lib) -{ - FILE *fp = lib->fp; - size_t size; - - size = fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp); - if (lib->size == 0 && !feof (fp)) { - fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp)); - exit (-1); - } - - lib->pos = 0; - lib->size = (int) size; - - return lib->size; -} - -unsigned char * -nextSeq (FASTA_LIB *lib, int *length) -{ - int inx; - int size; - int done; - int len; - - char *name = lib->seqName; - unsigned char *seq = lib->seqBuffer; - - /* check if we are at the end of the library */ - if (lib->size == 0) { - *length = 0; - return NULL; - } - - if (lib->pos == lib->size) { - readNextBlock (lib); - } - - inx = lib->pos; - - /* check for the start of a sequence */ - if (lib->readBuffer[inx] != '>') { - fprintf (stderr, "Error parsing fasta file expecting > found %c\n", - lib->readBuffer[inx]); - exit (-1); - } - - ++inx; - - /* read in the sequence name */ - len = 0; - done = 0; - do { - if (inx >= lib->size) { - size = readNextBlock (lib); - if (size == 0) { - *length = 0; - return NULL; - } - inx = lib->pos; - } else if (lib->readBuffer[inx] == '\n') { - *name = '\0'; - done = 1; - } else if (len < SEQ_NAME_SIZE - 1) { - *name++ = lib->readBuffer[inx]; - len++; - } - ++inx; - } while (!done); - - lib->pos = inx; - - /* read in the sequence */ - len = 0; - done = 0; - do { - if (inx >= lib->size) { - size = readNextBlock (lib); - if (size == 0) { - *seq = '\0'; - done = 1; - } - inx = 0; - } else if (isspace(lib->readBuffer[inx])) { - ++inx; - } else if (lib->readBuffer[inx] == '>') { - *seq = '\0'; - done = 1; - } else if (len >= MAX_SEQ_LENGTH) { - fprintf (stderr, "Sequence %s exceeds maximum length\n", - lib->seqName); - exit (-1); - } else { - int value = AMINO_ACID_VALUE[lib->readBuffer[inx]]; - if (value == -1) { - fprintf (stderr, "Unknown amino acid %c in sequence %s\n", - lib->readBuffer[inx], lib->seqName); - exit (-1); - } - *seq++ = (char) value; - inx++; - len++; - } - } while (!done); - - lib->pos = inx; - *length = len; - - lib->sequences++; - lib->residues += len; - - /* check if we need to pad the sequence to a multiple of 16 */ - if (lib->pad) { - inx = 16 - (len % 16); - while (inx--) { - *seq++ = ALPHA_SIZE; - } - *seq = '\0'; - } - - return lib->seqBuffer; -} - -void closeLib (FASTA_LIB *lib) -{ - fclose (lib->fp); - - free (lib->readBuffer); - free (lib->seqBuffer); - free (lib->seqName); - - free (lib); -} diff --git a/src/swsse2/fastalib.h b/src/swsse2/fastalib.h deleted file mode 100644 index b6b43e3..0000000 --- a/src/swsse2/fastalib.h +++ /dev/null @@ -1,45 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#ifndef INCLUDE_FASTALIB_H -#define INCLUDE_FASTALIB_H - -#include - -#define MAX_SEQ_LENGTH (64 * 1024) - -typedef struct { - char *readBuffer; - - char *seqName; - unsigned char *seqBuffer; - - int pos; - int size; - - FILE *fp; - - int sequences; - int residues; - - int pad; -} FASTA_LIB; - -FASTA_LIB *openLib (char *file, int pad); -void closeLib (FASTA_LIB *lib); - -unsigned char *nextSeq (FASTA_LIB *lib, int *length); - -#define seqName(LIB) (LIB->seqName) - -#endif /* INCLUDE_FASTALIB_H */ diff --git a/src/swsse2/matrix.c b/src/swsse2/matrix.c deleted file mode 100644 index 480daf1..0000000 --- a/src/swsse2/matrix.c +++ /dev/null @@ -1,198 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#include -#include -#include - -#include "swsse2.h" -#include "matrix.h" - -#define BUF_SIZE 512 - -char *skipSpaces (char *line) -{ - while (isspace (*line)) { - ++line; - } - - return line; -} - -signed char *readMatrix (char *file) -{ - FILE *fp; - char line[BUF_SIZE]; - - signed char *matrix; - - int mark[ALPHA_SIZE]; - int order[ALPHA_SIZE]; - - int done; - int i; - - int errors = 0; - int count = 0; - - if ((fp = fopen (file, "r")) == NULL) { - fprintf (stderr, "Unable to open file %s\n", file); - exit (-1); - } - - matrix = (signed char *) malloc (ALPHA_SIZE * ALPHA_SIZE); - if (!matrix) { - fprintf (stderr, "Unable to allocate memory for scoring matrix\n"); - exit (-1); - } - - /* initialize the order and mark arrays */ - for (i = 0; i < ALPHA_SIZE; ++i) { - order[i] = -1; - mark[i] = -1; - } - - /* read the first line of the matrix giving the amino acid order */ - done = 0; - while (!done && fgets (line, BUF_SIZE, fp) != NULL) { - char *ptr = skipSpaces (line); - if (*ptr && *ptr != '#') { - - while (*ptr && *ptr != '#') { - int inx = AMINO_ACID_VALUE[*ptr]; - - if (inx == -1) { - fprintf (stderr, "Unknown amino acid %c in %s\n", *ptr, file); - ++errors; - } else if (mark[inx] != -1) { - fprintf (stderr, "Amino acid %c defined twice\n", *ptr); - ++errors; - } else if (count >= ALPHA_SIZE) { - /* this should not happen, but we will be safe */ - fprintf (stderr, "Too many amino acids %d\n", count); - ++errors; - } else { - order[count++] = inx; - mark[inx] = inx; - } - ptr = skipSpaces (ptr + 1); - } - - done = 1; - } - } - - /* make sure all amino acids are defined */ - for (i = 0; i < ALPHA_SIZE; ++i) { - if (order[i] < 0) { - fprintf (stderr, "Missing column for amino acid %c\n", - AMINO_ACIDS[i]); - ++errors; - } - mark[i] = -1; - } - - if (errors > 0) { - fprintf (stderr, "Terminating due to errors in matrix file\n"); - exit (-1); - } - - /* read the scores for the amino acids */ - while (fgets (line, BUF_SIZE, fp) != NULL) { - signed char *row; - char *ptr = skipSpaces (line); - if (*ptr && *ptr != '#') { - char aminoAcid = *ptr; - int inx = AMINO_ACID_VALUE[*ptr]; - if (inx == -1) { - fprintf (stderr, "Unknown amino acid %c in matrix\n", *ptr); - ++errors; - } else if (mark[inx] != -1) { - fprintf (stderr, "Row %c defined twice\n", *ptr); - ++errors; - } - - row = &matrix[inx * ALPHA_SIZE]; - - for (i = 0; i < ALPHA_SIZE; ++i) { - int sign = 1; - int num = 0; - - ptr = skipSpaces (ptr + 1); - - /* check the sign */ - if (*ptr == '-') { - sign = -1; - ++ptr; - } - - do { - if (*ptr >= '0' && *ptr <= '9') { - num = num * 10 + (*ptr - '0'); - ptr++; - } else { - char name[16]; - char *pName; - if (isspace (*ptr)) { - pName = "space"; - } else if (*ptr == 0) { - pName = "end of line"; - } else { - name[0] = *ptr; - name[1] = 0; - pName = name; - } - fprintf (stderr, "Row %c Expecting digit found %s\n", - aminoAcid, pName); - exit (-1); - } - } while (*ptr && !isspace (*ptr)); - - num = num * sign; - - if (num < -128 || num > 127) { - fprintf (stderr, "Weight %d out of range row %c\n", - aminoAcid, num); - ++errors; - num = 0; - } - - row[order[i]] = (char) num; - } - - if (i < ALPHA_SIZE) { - fprintf (stderr, "Amino acid row %c incomplete\n", aminoAcid); - ++errors; - } - - mark[inx] = 1; - } - } - - /* make sure all amino acids are defined */ - for (i = 0; i < ALPHA_SIZE; ++i) { - if (mark[i] < 0) { - fprintf (stderr, "Missing row for amino acid %c\n", - AMINO_ACIDS[i]); - ++errors; - } - } - - if (errors) { - fprintf (stderr, "Terminating due to errors in matrix %s\n", file); - } - - fclose (fp); - - return matrix; -} diff --git a/src/swsse2/matrix.h b/src/swsse2/matrix.h deleted file mode 100644 index fee912c..0000000 --- a/src/swsse2/matrix.h +++ /dev/null @@ -1,19 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#ifndef INCLUDE_MATRIX_H -#define INCLUDE_MATRIX_H - -signed char *readMatrix (char *file); - -#endif /* INCLUDE_MATRIX_H */ diff --git a/src/swsse2/swscalar.c b/src/swsse2/swscalar.c deleted file mode 100644 index c2eea43..0000000 --- a/src/swsse2/swscalar.c +++ /dev/null @@ -1,226 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#include -#include - -#include "swsse2.h" -#include "swscalar.h" - -typedef struct { - int *pMatrix; - int *pH; - int *pE; - int *pData; -} SwScalarData; - -int -swScalar (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - int *pMatrix, - int *pH, - int *pE); - -void * -swScalarInit(unsigned char *querySeq, - int queryLength, - signed char *matrix) -{ - int i, j; - int nCount; - - int *pi; - - SwScalarData *pSwData; - - /* remove unreferenced warnings */ - querySeq; - - pSwData = (SwScalarData *) malloc (sizeof (SwScalarData)); - if (!pSwData) { - fprintf (stderr, "Unable to allocate memory for SW data\n"); - exit (-1); - } - - nCount = ALPHA_SIZE * ALPHA_SIZE + /* matrix */ - (queryLength * 3); /* vH1, vH2 and vE */ - - pSwData->pData = (int *) calloc (nCount, sizeof (int)); - if (!pSwData->pData) { - fprintf (stderr, "Unable to allocate memory for SW data buffers\n"); - exit (-1); - } - - pSwData->pMatrix = pSwData->pData; - - pSwData->pH = pSwData->pMatrix + ALPHA_SIZE * ALPHA_SIZE; - pSwData->pE = pSwData->pH + queryLength; - - /* Conver the scoring matrix to ints */ - pi = pSwData->pMatrix; - for (i = 0; i < ALPHA_SIZE; ++i) { - for (j = 0; j < ALPHA_SIZE; ++j) { - *pi++ = (int) *matrix++; - } - } - - return pSwData; -} - -void swScalarScan (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores) -{ - int score; - - int threshold = options->threshold; - - unsigned char *dbSeq; - int dbLen; - - int gapInit = -(options->gapInit + options->gapExt); - int gapExt = -options->gapExt; - - SwScalarData *scalarData = (SwScalarData *) swData; - - dbSeq = nextSeq (dbLib, &dbLen); - while (dbLen > 0) { - - score = swScalar (querySeq, queryLength, - dbSeq, dbLen, - gapInit, gapExt, - scalarData->pMatrix, - scalarData->pH, - scalarData->pE); - - if (score >= threshold) { - int minScore = insertList (scores, score, seqName (dbLib)); - if (minScore >= threshold) { - threshold = minScore; - } - } - - dbSeq = nextSeq (dbLib, &dbLen); - } -} - -void -swScalarComplete(void *pSwData) -{ - SwScalarData *pScalarData = (SwScalarData *) pSwData; - - free (pScalarData->pData); - free (pScalarData); -} - -int -swScalar(unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - int *pMatrix, - int *pH, - int *pE) -{ - int i, j; - - int E, F, H; - int prevH; - - int maxScore = 0; - - int *pScore; - - /* Zero out the storage vector */ - for (i = 0; i < queryLength; i++) - { - *(pE + i) = 0; - *(pH + i) = 0; - } - - for (i = 0; i < dbLength; ++i) - { - /* fetch first data asap. */ - pScore = pMatrix + dbSeq[i] * ALPHA_SIZE; - - /* zero out F. */ - F = 0; - - /* load the next h value */ - prevH = 0; - - for (j = 0; j < queryLength; j++) - { - /* load E values */ - E = *(pE + j); - - /* add score to H */ - H = prevH + *(pScore + querySeq[j]); - if (H < 0) { - H = 0; - } - - /* Update highest score encountered this far */ - if (H > maxScore) { - maxScore = H; - } - - /* get max from H, E and F */ - if (E > H) { - H = E; - } - if (F > H) { - H = F; - } - - /* save H values */ - prevH = *(pH + j); - *(pH + j) = H; - - H = H - gapOpen; - - /* update E value */ - E = E - gapExtend; - if (H > E) { - E = H; - } - if (E < 0) { - E = 0; - } - - /* update vF value */ - F = F - gapExtend; - if (H > F) { - F = H; - } - if (F < 0) { - F = 0; - } - - /* save vE values */ - *(pE + j) = E; - } - } - - /* return largest score */ - return maxScore; -} diff --git a/src/swsse2/swscalar.h b/src/swsse2/swscalar.h deleted file mode 100644 index 2149712..0000000 --- a/src/swsse2/swscalar.h +++ /dev/null @@ -1,39 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#ifndef INCLUDE_SWSCALAR_H -#define INCLUDE_SWSCALAR_H - -#include "swsse2.h" -#include "fastalib.h" - -SW_DATA * -swScalarInit (unsigned char *querySeq, - int queryLength, - signed char *matrix); - - -void -swScalarScan (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores); - - -void -swScalarComplete (SW_DATA *pSwData); - - -#endif /* INCLUDE_SWSCALAR_H */ diff --git a/src/swsse2/swsse2.c b/src/swsse2/swsse2.c deleted file mode 100644 index d2827a3..0000000 --- a/src/swsse2/swsse2.c +++ /dev/null @@ -1,430 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#include -#include -#include - -#include -#include - -#include "swsse2.h" -#include "matrix.h" -#include "fastalib.h" - -#include "swscalar.h" -#include "swwozniak.h" -#ifdef WITH_ROGNES -#include "swrognes.h" -#endif -#include "swstriped.h" - -typedef enum { SCALAR, - WOZNIAK, -#ifdef WITH_ROGNES - ROGNES, -#endif - STRIPED -} SW_TYPES; - -const char *SW_IMPLEMENATION[] = { - "Non-optimized", - "Wozniak", -#ifdef WITH_ROGNES - "Rognes", -#endif - "Striped", -}; - -typedef struct { - SW_DATA *(*init) (unsigned char *querySeq, - int queryLength, - signed char *matrix); - void (*scan) (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores); - void (*done) (SW_DATA *pSwData); -} SW_FUNCT_DEFS; - -SW_FUNCT_DEFS swFuncs[] = { - { - swScalarInit, - swScalarScan, - swScalarComplete, - }, - { - swWozniakInit, - swWozniakScan, - swWozniakComplete, - }, -#ifdef WITH_ROGNES - { - swRognesInit, - swRognesScan, - swRognesComplete, - }, -#endif - { - swStripedInit, - swStripedScan, - swStripedComplete, - }, -}; - -const char AMINO_ACIDS[ALPHA_SIZE] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', - 'S', 'T', 'V', 'W', 'X', 'Y', 'Z' -}; - -const int AMINO_ACID_VALUE[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1, - 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1, - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1, - 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}; - -SCORE_LIST *initList (int count); -void freeList (SCORE_LIST *list); - -void printResults (SCORE_LIST *list); - -void printUsage (void) -{ - printf ("Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] " - "[-c num] matrix query db\n"); - printf (" -h : this help message\n"); - printf (" -n : run a non-vectorized Smith-Waterman search\n"); - printf (" -w : run a vectorized Wozniak search\n"); -#ifdef WITH_ROGNES - printf (" -r : run a vectorized Rognes search\n"); -#else - printf (" -r : run a vectorized Rognes search (NOT SUPPORTED)\n"); -#endif - printf (" -s : run a vectorized striped search (default)\n"); - printf (" -i num : gap init penalty (default -10)\n"); - printf (" -e num : gap extension penalty (default -2)\n"); - printf (" -t num : minimum score threshold (default 20)\n"); - printf (" -c num : number of scores to be displayed (default 250)\n"); - printf (" matrix : scoring matrix file\n"); - printf (" query : query sequence file (fasta format)\n"); - printf (" db : sequence database file (fasta format)\n"); -} - -#if 0 -int main (int argc, char **argv) -{ - int i; - - int penalty; - - int rptCount = 250; - - SW_TYPES swType = STRIPED; - - char *dbFile = NULL; - char *queryFile = NULL; - char *matrixFile = NULL; - - signed char *matrix; - - unsigned char *querySeq; - int queryLen; - - SCORE_LIST *list; - - FASTA_LIB *queryLib; - FASTA_LIB *dbLib; - - void *swData; - - struct _timeb startTime; - struct _timeb endTime; - - double startTimeSec; - double endTimeSec; - - SEARCH_OPTIONS options; - - if (argc < 4) { - printUsage (); - exit (-1); - } - - /* set the default options */ - options.gapInit = -10; - options.gapExt = -2; - options.threshold = 20; - - i = 1; - while (i < argc) { - if (i + 3 == argc) { - /* should be matrix file name */ - matrixFile = argv[i]; - - } else if (i + 2 == argc) { - /* should be query file name */ - queryFile = argv[i]; - - } else if (i + 1 == argc) { - /* should be matrix file name */ - dbFile = argv[i]; - - } else { - /* process arguements */ - switch (argv[i][1]) { - case 'h': - printUsage (); - break; - case 'n': - swType = SCALAR; - break; - case 'r': -#ifdef WITH_ROGNES - swType = ROGNES; -#else - fprintf (stderr, "The ROGNES implementation is not supported\n"); - exit (-1); -#endif - break; - case 'w': - swType = WOZNIAK; - break; - case 's': - swType = STRIPED; - break; - case 'i': - penalty = atoi (argv[++i]); - if (penalty > 0 || penalty < -128) { - fprintf (stderr, "Invalid gap init %d\n", penalty); - fprintf (stderr, "Valid range is 0 - -128\n"); - exit (-1); - } - options.gapInit = (unsigned short) penalty; - break; - case 'e': - penalty = atoi (argv[++i]); - if (penalty > 0 || penalty < -128) { - fprintf (stderr, "Invalid gap extension %d\n", penalty); - fprintf (stderr, "Valid range is 0 - -128\n"); - exit (-1); - } - options.gapExt = (unsigned short) penalty; - break; - case 't': - options.threshold = atoi (argv[++i]); - break; - case 'c': - rptCount = atoi (argv[++i]); - if (rptCount < 10) { - rptCount = 10; - } - break; - default: - fprintf (stderr, "Invalid option %s\n", argv[i]); - printUsage (); - exit (-1); - } - } - ++i; - } - - list = initList (rptCount); - - matrix = readMatrix (matrixFile); - if (matrix == NULL) { - fprintf (stderr, "Error reading matrix\n"); - exit (-1); - } - - dbLib = openLib (dbFile, swType == WOZNIAK); - queryLib = openLib (queryFile, 0); - - querySeq = nextSeq (queryLib, &queryLen); - if (queryLen == 0) { - fprintf (stderr, "Empty query sequence\n"); - exit (-1); - } - - printf ("%s vs %s\n", queryFile, dbFile); - printf ("Matrix: %s, Init: %d, Ext: %d\n\n", - matrixFile, options.gapInit, options.gapExt); - - _ftime(&startTime); - - swData = (swFuncs[swType].init) (querySeq, queryLen, matrix); - - (swFuncs[swType].scan) (querySeq, queryLen, dbLib, swData, &options, list); - - (swFuncs[swType].done) (swData); - - _ftime(&endTime); - - printResults (list); - - printf ("\n"); - printf ("%d residues in query string\n", queryLen); - printf ("%d residues in %d library sequences\n", - dbLib->residues, dbLib->sequences); - - startTimeSec = startTime.time + startTime.millitm / 1000.0; - endTimeSec = endTime.time + endTime.millitm / 1000.0; - printf ("Scan time: %6.3f (%s implementation)\n", - endTimeSec - startTimeSec, - SW_IMPLEMENATION[swType]); - - closeLib (queryLib); - closeLib (dbLib); - - free (matrix); - - freeList (list); -} -#endif - -SCORE_LIST * -initList (int count) -{ - int i; - - SCORE_LIST *hdr; - SCORE_NODE *list; - SCORE_NODE *prev; - - hdr = (SCORE_LIST *) malloc (sizeof (SCORE_LIST)); - if (hdr == NULL) { - fprintf (stderr, "Cannot allocate storage for score header\n"); - exit (-1); - } - - list = (SCORE_NODE *) calloc (count, sizeof (SCORE_NODE)); - if (list == NULL) { - fprintf (stderr, "Cannot allocate storage for scores\n"); - exit (-1); - } - - /* initialize the scores list */ - hdr->minScore = 0; - hdr->first = NULL; - hdr->last = NULL; - hdr->free = list; - hdr->buffer = list; - - prev = NULL; - for (i = 0; i < count; ++i) { - list[i].name[0] = '\0'; - list[i].score = 0; - - if (i == 0) { - list[i].prev = NULL; - } else { - list[i].prev = &list[i-1]; - } - - if (i == count - 1) { - list[i].next = NULL; - } else { - list[i].next = &list[i+1]; - } - } - - return hdr; -} - -void freeList (SCORE_LIST *list) -{ - free (list->buffer); - free (list); -} - -int insertList (SCORE_LIST *list, int score, char *name) -{ - SCORE_NODE *node; - SCORE_NODE *ptr = list->first; - - if (list->free != NULL) { - node = list->free; - list->free = list->free->next; - } else if (score > list->last->score) { - node = list->last; - list->last = node->prev; - list->last->next = NULL; - } else { - /* should never happen */ - return list->minScore + 1; - } - - strncpy (node->name, name, MAX_SCORE_NAME); - node->name[MAX_SCORE_NAME - 1] = '\0'; - node->score = score; - - while (ptr && ptr->score >= score) { - ptr = ptr->next; - } - - if (list->first == NULL) { - list->first = node; - list->last = node; - node->prev = NULL; - node->next = NULL; - } else if (ptr == NULL) { - node->prev = list->last; - node->next = NULL; - node->prev->next = node; - list->last = node; - } else { - node->prev = ptr->prev; - node->next = ptr; - - if (node->prev == NULL) { - list->first = node; - } else { - node->prev->next = node; - } - ptr->prev = node; - } - - if (list->free == NULL) { - list->minScore = list->last->score + 1; - } - - return list->minScore; -} - -void printResults (SCORE_LIST *list) -{ - SCORE_NODE *ptr = list->first; - - printf ("Score Description\n"); - - while (ptr) { - printf ("%5d %s\n", ptr->score, ptr->name); - ptr = ptr->next; - } -} - diff --git a/src/swsse2/swsse2.h b/src/swsse2/swsse2.h deleted file mode 100644 index b488e14..0000000 --- a/src/swsse2/swsse2.h +++ /dev/null @@ -1,50 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#ifndef INCLUDE_SWSSE2_H -#define INCLUDE_SWSSE2_H - -typedef void SW_DATA; - -#define ALPHA_SIZE 23 - -extern const char AMINO_ACIDS[ALPHA_SIZE]; -extern const int AMINO_ACID_VALUE[256]; - -#define SHORT_BIAS 32768 - -typedef struct { - short gapInit; - short gapExt; - int threshold; -} SEARCH_OPTIONS; - -#define MAX_SCORE_NAME 64 -typedef struct SCORE_STRUCT { - int score; - char name[MAX_SCORE_NAME]; - struct SCORE_STRUCT *prev; - struct SCORE_STRUCT *next; -} SCORE_NODE; - -typedef struct { - int minScore; - SCORE_NODE *first; - SCORE_NODE *last; - SCORE_NODE *free; - void *buffer; -} SCORE_LIST; - -int insertList (SCORE_LIST *list, int score, char *name); - -#endif /* INCLUDE_SWSSE2_H */ diff --git a/src/swsse2/swstriped.c b/src/swsse2/swstriped.c deleted file mode 100644 index 51dd4c2..0000000 --- a/src/swsse2/swstriped.c +++ /dev/null @@ -1,573 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#include -#include -#include - -#include "swsse2.h" -#include "swstriped.h" - -void * -swStripedInit(unsigned char *querySeq, - int queryLength, - signed char *matrix) -{ - int i, j, k; - - int segSize; - int nCount; - - int bias; - - int lenQryByte; - int lenQryShort; - - int weight; - - short *ps; - char *pc; - - signed char *matrixRow; - - size_t aligned; - - SwStripedData *pSwData; - - lenQryByte = (queryLength + 15) / 16; - lenQryShort = (queryLength + 7) / 8; - - pSwData = (SwStripedData *) malloc (sizeof (SwStripedData)); - if (!pSwData) { - fprintf (stderr, "Unable to allocate memory for SW data\n"); - exit (-1); - } - - nCount = 64 + /* slack bytes */ - lenQryByte * ALPHA_SIZE + /* query profile byte */ - lenQryShort * ALPHA_SIZE + /* query profile short */ - (lenQryShort * 3); /* vH1, vH2 and vE */ - - pSwData->pData = (unsigned char *) calloc (nCount, sizeof (__m128i)); - if (!pSwData->pData) { - fprintf (stderr, "Unable to allocate memory for SW data buffers\n"); - exit (-1); - } - - /* since we might port this to another platform, lets align the data */ - /* to 16 byte boundries ourselves */ - aligned = ((size_t) pSwData->pData + 15) & ~(0x0f); - - pSwData->pvbQueryProf = (__m128i *) aligned; - pSwData->pvsQueryProf = pSwData->pvbQueryProf + lenQryByte * ALPHA_SIZE; - - pSwData->pvH1 = pSwData->pvsQueryProf + lenQryShort * ALPHA_SIZE; - pSwData->pvH2 = pSwData->pvH1 + lenQryShort; - pSwData->pvE = pSwData->pvH2 + lenQryShort; - - /* Use a scoring profile for the SSE2 implementation, but the layout - * is a bit strange. The scoring profile is parallel to the query, but is - * accessed in a stripped pattern. The query is divided into equal length - * segments. The number of segments is equal to the number of elements - * processed in the SSE2 register. For 8-bit calculations, the query will - * be divided into 16 equal length parts. If the query is not long enough - * to fill the last segment, it will be filled with neutral weights. The - * first element in the SSE register will hold a value from the first segment, - * the second element of the SSE register will hold a value from the - * second segment and so on. So if the query length is 288, then each - * segment will have a length of 18. So the first 16 bytes will have - * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will - * have the following weights: Q2, Q20, Q38, ... Q272; and so on until - * all parts of all segments have been written. The last seqment will - * have the following weights: Q18, Q36, Q54, ... Q288. This will be - * done for the entire alphabet. - */ - - /* Find the bias to use in the substitution matrix */ - bias = 127; - for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) { - if (matrix[i] < bias) { - bias = matrix[i]; - } - } - if (bias > 0) { - bias = 0; - } - - /* Fill in the byte query profile */ - pc = (char *) pSwData->pvbQueryProf; - segSize = (queryLength + 15) / 16; - nCount = segSize * 16; - for (i = 0; i < ALPHA_SIZE; ++i) { - matrixRow = matrix + i * ALPHA_SIZE; - for (j = 0; j < segSize; ++j) { - for (k = j; k < nCount; k += segSize) { - if (k >= queryLength) { - weight = 0; - } else { - weight = matrixRow[*(querySeq + k)]; - } - *pc++ = (char) (weight - bias); - } - } - } - - /* Fill in the short query profile */ - ps = (short *) pSwData->pvsQueryProf; - segSize = (queryLength + 7) / 8; - nCount = segSize * 8; - for (i = 0; i < ALPHA_SIZE; ++i) { - matrixRow = matrix + i * ALPHA_SIZE; - for (j = 0; j < segSize; ++j) { - for (k = j; k < nCount; k += segSize) { - if (k >= queryLength) { - weight = 0; - } else { - weight = matrixRow[*(querySeq + k)]; - } - *ps++ = (unsigned short) weight; - } - } - } - - pSwData->bias = (unsigned short) -bias; - - return pSwData; -} - -void swStripedScan (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores) -{ - int score; - - int threshold = options->threshold; - - unsigned char *dbSeq; - int dbLen; - - int gapInit = -(options->gapInit + options->gapExt); - int gapExt = -options->gapExt; - - SwStripedData *stripedData = (SwStripedData *) swData; - - dbSeq = nextSeq (dbLib, &dbLen); - while (dbLen > 0) { - - score = swStripedByte (querySeq, queryLength, - dbSeq, dbLen, - gapInit, gapExt, - stripedData->pvbQueryProf, - stripedData->pvH1, - stripedData->pvH2, - stripedData->pvE, - stripedData->bias); - - /* check if needs a run with higher precision */ - if (score >= 255) { - score = swStripedWord (querySeq, queryLength, - dbSeq, dbLen, - gapInit, gapExt, - stripedData->pvsQueryProf, - stripedData->pvH1, - stripedData->pvH2, - stripedData->pvE); - } - - if (score >= threshold) { - int minScore = insertList (scores, score, seqName (dbLib)); - if (minScore >= threshold) { - threshold = minScore; - } - } - - dbSeq = nextSeq (dbLib, &dbLen); - } -} - -void -swStripedComplete(void *pSwData) -{ - SwStripedData *pStripedData = (SwStripedData *) pSwData; - - free (pStripedData->pData); - free (pStripedData); -} - -int -swStripedWord(unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - __m128i *pvQueryProf, - __m128i *pvHLoad, - __m128i *pvHStore, - __m128i *pvE) -{ - int i, j; - int score; - - int cmp; - int iter = (queryLength + 7) / 8; - - __m128i *pv; - - __m128i vE, vF, vH; - - __m128i vMaxScore; - __m128i vGapOpen; - __m128i vGapExtend; - - __m128i vMin; - __m128i vMinimums; - __m128i vTemp; - - __m128i *pvScore; - - /* remove unreferenced warning */ - querySeq; - - /* Load gap opening penalty to all elements of a constant */ - vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0); - vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0); - vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0); - - /* Load gap extension penalty to all elements of a constant */ - vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0); - vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0); - vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0); - - /* load vMaxScore with the zeros. since we are using signed */ - /* math, we will bias the maxscore to -32768 so we have the */ - /* full range of the short. */ - vMaxScore = _mm_cmpeq_epi16 (vMaxScore, vMaxScore); - vMaxScore = _mm_slli_epi16 (vMaxScore, 15); - - vMinimums = _mm_shuffle_epi32 (vMaxScore, 0); - - vMin = _mm_shuffle_epi32 (vMaxScore, 0); - vMin = _mm_srli_si128 (vMin, 14); - - /* Zero out the storage vector */ - for (i = 0; i < iter; i++) - { - _mm_store_si128 (pvE + i, vMaxScore); - _mm_store_si128 (pvHStore + i, vMaxScore); - } - - for (i = 0; i < dbLength; ++i) - { - /* fetch first data asap. */ - pvScore = pvQueryProf + dbSeq[i] * iter; - - /* zero out F. */ - vF = _mm_cmpeq_epi16 (vF, vF); - vF = _mm_slli_epi16 (vF, 15); - - /* load the next h value */ - vH = _mm_load_si128 (pvHStore + iter - 1); - vH = _mm_slli_si128 (vH, 2); - vH = _mm_or_si128 (vH, vMin); - - pv = pvHLoad; - pvHLoad = pvHStore; - pvHStore = pv; - - for (j = 0; j < iter; j++) - { - /* load values of vF and vH from previous row (one unit up) */ - vE = _mm_load_si128 (pvE + j); - - /* add score to vH */ - vH = _mm_adds_epi16 (vH, *pvScore++); - - /* Update highest score encountered this far */ - vMaxScore = _mm_max_epi16 (vMaxScore, vH); - - /* get max from vH, vE and vF */ - vH = _mm_max_epi16 (vH, vE); - vH = _mm_max_epi16 (vH, vF); - - /* save vH values */ - _mm_store_si128 (pvHStore + j, vH); - - /* update vE value */ - vH = _mm_subs_epi16 (vH, vGapOpen); - vE = _mm_subs_epi16 (vE, vGapExtend); - vE = _mm_max_epi16 (vE, vH); - - /* update vF value */ - vF = _mm_subs_epi16 (vF, vGapExtend); - vF = _mm_max_epi16 (vF, vH); - - /* save vE values */ - _mm_store_si128 (pvE + j, vE); - - /* load the next h value */ - vH = _mm_load_si128 (pvHLoad + j); - } - - /* reset pointers to the start of the saved data */ - j = 0; - vH = _mm_load_si128 (pvHStore + j); - - /* the computed vF value is for the given column. since */ - /* we are at the end, we need to shift the vF value over */ - /* to the next column. */ - vF = _mm_slli_si128 (vF, 2); - vF = _mm_or_si128 (vF, vMin); - vTemp = _mm_subs_epi16 (vH, vGapOpen); - vTemp = _mm_cmpgt_epi16 (vF, vTemp); - cmp = _mm_movemask_epi8 (vTemp); - while (cmp != 0x0000) - { - vE = _mm_load_si128 (pvE + j); - - vH = _mm_max_epi16 (vH, vF); - - /* save vH values */ - _mm_store_si128 (pvHStore + j, vH); - - /* update vE incase the new vH value would change it */ - vH = _mm_subs_epi16 (vH, vGapOpen); - vE = _mm_max_epi16 (vE, vH); - _mm_store_si128 (pvE + j, vE); - - /* update vF value */ - vF = _mm_subs_epi16 (vF, vGapExtend); - - j++; - if (j >= iter) - { - j = 0; - vF = _mm_slli_si128 (vF, 2); - vF = _mm_or_si128 (vF, vMin); - } - - vH = _mm_load_si128 (pvHStore + j); - - vTemp = _mm_subs_epi16 (vH, vGapOpen); - vTemp = _mm_cmpgt_epi16 (vF, vTemp); - cmp = _mm_movemask_epi8 (vTemp); - } - } - - /* find largest score in the vMaxScore vector */ - vTemp = _mm_srli_si128 (vMaxScore, 8); - vMaxScore = _mm_max_epi16 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 4); - vMaxScore = _mm_max_epi16 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 2); - vMaxScore = _mm_max_epi16 (vMaxScore, vTemp); - - /* store in temporary variable */ - score = (short) _mm_extract_epi16 (vMaxScore, 0); - - /* return largest score */ - return score + SHORT_BIAS; -} - - - - -int -swStripedByte(unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - __m128i *pvQueryProf, - __m128i *pvHLoad, - __m128i *pvHStore, - __m128i *pvE, - unsigned short bias) -{ - int i, j; - int score; - - int dup; - int cmp; - int iter = (queryLength + 15) / 16; - - __m128i *pv; - - __m128i vE, vF, vH; - - __m128i vMaxScore; - __m128i vBias; - __m128i vGapOpen; - __m128i vGapExtend; - - __m128i vTemp; - __m128i vZero; - - __m128i *pvScore; - - /* remove unreferenced warning */ - querySeq; - - /* Load the bias to all elements of a constant */ - dup = (bias << 8) | (bias & 0x00ff); - vBias = _mm_insert_epi16 (vBias, dup, 0); - vBias = _mm_shufflelo_epi16 (vBias, 0); - vBias = _mm_shuffle_epi32 (vBias, 0); - - /* Load gap opening penalty to all elements of a constant */ - dup = (gapOpen << 8) | (gapOpen & 0x00ff); - vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0); - vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0); - vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0); - - /* Load gap extension penalty to all elements of a constant */ - dup = (gapExtend << 8) | (gapExtend & 0x00ff); - vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0); - vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0); - vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0); - - vMaxScore = _mm_xor_si128 (vMaxScore, vMaxScore); - - vZero = _mm_xor_si128 (vZero, vZero); - - /* Zero out the storage vector */ - for (i = 0; i < iter; i++) - { - _mm_store_si128 (pvE + i, vMaxScore); - _mm_store_si128 (pvHStore + i, vMaxScore); - } - - for (i = 0; i < dbLength; ++i) - { - /* fetch first data asap. */ - pvScore = pvQueryProf + dbSeq[i] * iter; - - /* zero out F. */ - vF = _mm_xor_si128 (vF, vF); - - /* load the next h value */ - vH = _mm_load_si128 (pvHStore + iter - 1); - vH = _mm_slli_si128 (vH, 1); - - pv = pvHLoad; - pvHLoad = pvHStore; - pvHStore = pv; - - for (j = 0; j < iter; j++) - { - /* load values of vF and vH from previous row (one unit up) */ - vE = _mm_load_si128 (pvE + j); - - /* add score to vH */ - vH = _mm_adds_epu8 (vH, *(pvScore + j)); - vH = _mm_subs_epu8 (vH, vBias); - - /* Update highest score encountered this far */ - vMaxScore = _mm_max_epu8 (vMaxScore, vH); - - /* get max from vH, vE and vF */ - vH = _mm_max_epu8 (vH, vE); - vH = _mm_max_epu8 (vH, vF); - - /* save vH values */ - _mm_store_si128 (pvHStore + j, vH); - - /* update vE value */ - vH = _mm_subs_epu8 (vH, vGapOpen); - vE = _mm_subs_epu8 (vE, vGapExtend); - vE = _mm_max_epu8 (vE, vH); - - /* update vF value */ - vF = _mm_subs_epu8 (vF, vGapExtend); - vF = _mm_max_epu8 (vF, vH); - - /* save vE values */ - _mm_store_si128 (pvE + j, vE); - - /* load the next h value */ - vH = _mm_load_si128 (pvHLoad + j); - } - - /* reset pointers to the start of the saved data */ - j = 0; - vH = _mm_load_si128 (pvHStore + j); - - /* the computed vF value is for the given column. since */ - /* we are at the end, we need to shift the vF value over */ - /* to the next column. */ - vF = _mm_slli_si128 (vF, 1); - vTemp = _mm_subs_epu8 (vH, vGapOpen); - vTemp = _mm_subs_epu8 (vF, vTemp); - vTemp = _mm_cmpeq_epi8 (vTemp, vZero); - cmp = _mm_movemask_epi8 (vTemp); - - while (cmp != 0xffff) - { - vE = _mm_load_si128 (pvE + j); - - vH = _mm_max_epu8 (vH, vF); - - /* save vH values */ - _mm_store_si128 (pvHStore + j, vH); - - /* update vE incase the new vH value would change it */ - vH = _mm_subs_epu8 (vH, vGapOpen); - vE = _mm_max_epu8 (vE, vH); - _mm_store_si128 (pvE + j, vE); - - /* update vF value */ - vF = _mm_subs_epu8 (vF, vGapExtend); - - j++; - if (j >= iter) - { - j = 0; - vF = _mm_slli_si128 (vF, 1); - } - - vH = _mm_load_si128 (pvHStore + j); - - vTemp = _mm_subs_epu8 (vH, vGapOpen); - vTemp = _mm_subs_epu8 (vF, vTemp); - vTemp = _mm_cmpeq_epi8 (vTemp, vZero); - cmp = _mm_movemask_epi8 (vTemp); - } - } - - /* find largest score in the vMaxScore vector */ - vTemp = _mm_srli_si128 (vMaxScore, 8); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 4); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 2); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 1); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - - /* store in temporary variable */ - score = _mm_extract_epi16 (vMaxScore, 0); - score = score & 0x00ff; - - /* check if we might have overflowed */ - if (score + bias >= 255) - { - score = 255; - } - - /* return largest score */ - return score; -} diff --git a/src/swsse2/swstriped.h b/src/swsse2/swstriped.h deleted file mode 100644 index 3cab0ba..0000000 --- a/src/swsse2/swstriped.h +++ /dev/null @@ -1,78 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#ifndef INCLUDE_SWSTRIPED_H -#define INCLUDE_SWSTRIPED_H - -#include - -#include "swsse2.h" -#include "fastalib.h" - -SW_DATA * -swStripedInit (unsigned char *querySeq, - int queryLength, - signed char *matrix); - - -void -swStripedScan (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores); - - -void -swStripedComplete (SW_DATA *pSwData); - -int -swStripedWord (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - __m128i *queryProf, - __m128i *pvH1, - __m128i *pvH2, - __m128i *pvE); - - -int -swStripedByte (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - __m128i *queryProf, - __m128i *pvH1, - __m128i *pvH2, - __m128i *pvE, - unsigned short bias); - -typedef struct { - __m128i *pvbQueryProf; - __m128i *pvsQueryProf; - __m128i *pvH1; - __m128i *pvH2; - __m128i *pvE; - unsigned char *pData; - unsigned short bias; -} SwStripedData; - - - -#endif /* INCLUDE_SWSTRIPED_H */ diff --git a/src/swsse2/swwozniak.c b/src/swsse2/swwozniak.c deleted file mode 100644 index 4ae1206..0000000 --- a/src/swsse2/swwozniak.c +++ /dev/null @@ -1,708 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -/* Implementation of the Wozniak "vertical" vectorization - strategy for Smith-Waterman comparison, Wozniak (1997) Comp. - Appl. Biosci. 13:145-150 -*/ - -#include -#include -#include - -#include "swsse2.h" -#include "swwozniak.h" - -#define MATRIX_ROW_SIZE 32 -#define MATRIX_SIZE (MATRIX_ROW_SIZE * (ALPHA_SIZE + 1)) - -typedef struct { - char *pbMatrix; - short *psMatrix; - __m128i *pvHStore; - __m128i *pvEStore; - unsigned char *pData; - unsigned short bias; -} SwWozniakData; - -int -swWozniakWord (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - short *pMatrix, - __m128i *pvHStore, - __m128i *pvEStore); - -int -swWozniakByte (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - char *pMatrix, - __m128i *pvHStore, - __m128i *pvEStore, - unsigned short bias); - -void * -swWozniakInit(unsigned char *querySeq, - int queryLength, - signed char *matrix) -{ - int i, j; - - int nCount; - - int lenQryByte; - int lenQryShort; - - int bias; - - int weight; - - short *ps; - char *pc; - - signed char *matrixRow; - - size_t aligned; - - SwWozniakData *pSwData; - - lenQryByte = (queryLength + 15) / 16 + 2; - lenQryShort = (queryLength + 7) / 8 + 2; - - pSwData = (SwWozniakData *) malloc (sizeof (SwWozniakData)); - if (!pSwData) { - fprintf (stderr, "Unable to allocate memory for SW data\n"); - exit (-1); - } - - nCount = 64 + /* slack bytes */ - 4 * (ALPHA_SIZE + 1) + /* byte matrix */ - 4 * (ALPHA_SIZE + 1) + /* short matrix */ - ((queryLength + 16) * 2); /* vH and vE */ - - - pSwData->pData = (unsigned char *) calloc (nCount, sizeof (__m128i)); - if (!pSwData->pData) { - fprintf (stderr, "Unable to allocate memory for SW data buffers\n"); - exit (-1); - } - - pSwData->pbMatrix = (char *) pSwData->pData; - pSwData->psMatrix = (short *) (pSwData->pbMatrix + MATRIX_SIZE); - - /* since we might port this to another platform, lets align the data */ - /* to 16 byte boundries ourselves */ - aligned = (size_t) (pSwData->psMatrix + MATRIX_SIZE); - aligned = (aligned + 15) & ~(0x0f); - - pSwData->pvHStore = (__m128i *) aligned; - pSwData->pvEStore = pSwData->pvHStore + queryLength + 16; - - /* Find the bias to use in the substitution matrix */ - bias = 127; - for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) { - if (matrix[i] < bias) { - bias = matrix[i]; - } - } - if (bias > 0) { - bias = 0; - } - - pc = pSwData->pbMatrix; - ps = pSwData->psMatrix; - - for (i = 0; i < ALPHA_SIZE; i++) { - matrixRow = matrix + i * ALPHA_SIZE; - - for (j = 0; j < ALPHA_SIZE; j++) { - weight = matrixRow[j]; - *pc++ = weight - bias; - *ps++ = weight; - } - - for ( ; j < MATRIX_ROW_SIZE; j++) { - *pc++ = -bias; - *ps++ = 0; - } - } - - /* add the weights for the NULL rows */ - for (j = 0; j < MATRIX_ROW_SIZE; j++) { - *pc++ = -bias; - *ps++ = 0; - } - - pSwData->bias = (unsigned short) -bias; - - return pSwData; -} - -void swWozniakScan (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores) -{ - int score; - - int threshold = options->threshold; - - unsigned char *dbSeq; - int dbLen; - - int gapInit = -(options->gapInit + options->gapExt); - int gapExt = -options->gapExt; - - SwWozniakData *wozniakData = (SwWozniakData *) swData; - - dbSeq = nextSeq (dbLib, &dbLen); - while (dbLen > 0) { - - score = swWozniakByte (querySeq, queryLength, - dbSeq, dbLen, - gapInit, gapExt, - wozniakData->pbMatrix, - wozniakData->pvHStore, - wozniakData->pvEStore, - wozniakData->bias); - - /* check if needs a run with higher precision */ - if (score >= 255) { - score = swWozniakWord (querySeq, queryLength, - dbSeq, dbLen, - gapInit, gapExt, - wozniakData->psMatrix, - wozniakData->pvHStore, - wozniakData->pvEStore); - } - - if (score >= threshold) { - int minScore = insertList (scores, score, seqName (dbLib)); - if (minScore >= threshold) { - threshold = minScore; - } - } - - dbSeq = nextSeq (dbLib, &dbLen); - } -} - -void -swWozniakComplete(void *pSwData) -{ - SwWozniakData *pWozniakData = (SwWozniakData *) pSwData; - - free (pWozniakData->pData); - free (pWozniakData); -} - - -int -swWozniakWord (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - short *pMatrix, - __m128i *pvHStore, - __m128i *pvEStore) -{ - int i, j, k, l, m; - int score; - - short *pScore; - - __m128i vE, vF, vH; - __m128i vEUp, vHUp1, vHUp2; - - __m128i vMaxScore; - __m128i vGapOpen; - __m128i vGapExtend; - __m128i vScore; - - __m128i vMin; - __m128i vMinimums; - __m128i vTemp; - - /* remove unreferenced warning */ - querySeq; - - /* Load gap opening penalty to all elements of a constant */ - vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0); - vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0); - vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0); - - /* Load gap extension penalty to all elements of a constant */ - vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0); - vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0); - vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0); - - /* load vMaxScore with the zeros. since we are using signed */ - /* math, we will bias the maxscore to -32768 so we have the */ - /* full range of the short. */ - vMaxScore = _mm_cmpeq_epi16 (vMaxScore, vMaxScore); - vMaxScore = _mm_slli_epi16 (vMaxScore, 15); - - vMinimums = _mm_shuffle_epi32 (vMaxScore, 0); - - vMin = _mm_shuffle_epi32 (vMaxScore, 0); - vMin = _mm_srli_si128 (vMin, 14); - - for (i = 0; i < queryLength + 8; i++) - { - _mm_store_si128 (pvEStore + i, vMaxScore); - _mm_store_si128 (pvHStore + i, vMaxScore); - } - - pScore = (short *) &vScore; - - for (i = 0; i < dbLength; i += 8, dbSeq += 8) - { - /* zero lots of stuff. */ - vE = _mm_shuffle_epi32 (vMinimums, 0); - vF = _mm_shuffle_epi32 (vMinimums, 0); - vH = _mm_shuffle_epi32 (vMinimums, 0); - vHUp2 = _mm_shuffle_epi32 (vMinimums, 0); - - vScore = _mm_xor_si128 (vScore, vScore); - - for (j = 0; j < 8; ++j) - { - for (k = 0; k <= j; ++k) { - int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE; - pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k)); - } - for ( ; k < 8; ++k) { - pScore[k] = 0; - } - - /* load values of vE and vH from previous row (one unit up) */ - vEUp = _mm_load_si128 (pvEStore + j); - vHUp1 = _mm_load_si128 (pvHStore + j); - - /* shift into place so we have complete vE and vH vectors */ - /* that refer to the values one unit up from each cell */ - /* that we are currently working on. */ - vTemp = _mm_slli_si128 (vE, 2); - vEUp = _mm_srli_si128 (vEUp, 14); - vEUp = _mm_or_si128 (vEUp, vTemp); - - vTemp = _mm_slli_si128 (vH, 2); - vHUp1 = _mm_srli_si128 (vHUp1, 14); - vHUp1 = _mm_or_si128 (vHUp1, vTemp); - - /* do the dynamic programming */ - - /* update vE value */ - vE = _mm_subs_epi16 (vEUp, vGapExtend); - vTemp = _mm_subs_epi16 (vHUp1, vGapOpen); - vE = _mm_max_epi16 (vE, vTemp); - - /* update vF value */ - vF = _mm_subs_epi16 (vF, vGapExtend); - vTemp = _mm_subs_epi16 (vH, vGapOpen); - vF = _mm_max_epi16 (vF, vTemp); - - /* add score to vH */ - vH = _mm_adds_epi16 (vHUp2, vScore); - - /* set vH to max of vH, vE, vF */ - vH = _mm_max_epi16 (vH, vE); - vH = _mm_max_epi16 (vH, vF); - - /* Save value to use for next diagonal vH */ - vHUp2 = vHUp1; - - /* Update highest score encountered this far */ - vMaxScore = _mm_max_epi16 (vMaxScore, vH); - } - - for (l = 0; j < queryLength; ++j, ++l) - { - for (k = 0; k < 8; ++k) { - int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE; - pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k)); - } - - /* load values of vE and vH from previous row (one unit up) */ - vEUp = _mm_load_si128 (pvEStore + j); - vHUp1 = _mm_load_si128 (pvHStore + j); - - /* save old values of vE and vH to use on next row */ - _mm_store_si128 (pvEStore + l, vE); - _mm_store_si128 (pvHStore + l, vH); - - /* shift into place so we have complete vE and vH vectors */ - /* that refer to the values one unit up from each cell */ - /* that we are currently working on. */ - vTemp = _mm_slli_si128 (vE, 2); - vEUp = _mm_srli_si128 (vEUp, 14); - vEUp = _mm_or_si128 (vEUp, vTemp); - - vTemp = _mm_slli_si128 (vH, 2); - vHUp1 = _mm_srli_si128 (vHUp1, 14); - vHUp1 = _mm_or_si128 (vHUp1, vTemp); - - /* do the dynamic programming */ - - /* update vE value */ - vE = _mm_subs_epi16 (vEUp, vGapExtend); - vTemp = _mm_subs_epi16 (vHUp1, vGapOpen); - vE = _mm_max_epi16 (vE, vTemp); - - /* update vF value */ - vF = _mm_subs_epi16 (vF, vGapExtend); - vTemp = _mm_subs_epi16 (vH, vGapOpen); - vF = _mm_max_epi16 (vF, vTemp); - - /* add score to vH */ - vH = _mm_adds_epi16(vHUp2, vScore); - - /* set vH to max of vH, vE, vF */ - vH = _mm_max_epi16 (vH, vE); - vH = _mm_max_epi16 (vH, vF); - - /* Save value to use for next diagonal vH */ - vHUp2 = vHUp1; - - /* Update highest score encountered this far */ - vMaxScore = _mm_max_epi16 (vMaxScore, vH); - } - - for (m = 0 ; m < 7; ++j, ++l, ++m) - { - for (k = 0; k <= m; ++k) { - pScore[k] = 0; - } - for ( ; k < 8; ++k) { - int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE; - pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k)); - } - - /* save old values of vE and vH to use on next row */ - _mm_store_si128 (pvEStore + l, vE); - _mm_store_si128 (pvHStore + l, vH); - - /* v_score_load contains all zeros */ - vTemp = _mm_slli_si128 (vE, 2); - vEUp = _mm_or_si128 (vMin, vTemp); - vTemp = _mm_slli_si128 (vH, 2); - vHUp1 = _mm_or_si128 (vMin, vTemp); - - /* do the dynamic programming */ - - /* update vE value */ - vE = _mm_subs_epi16 (vEUp, vGapExtend); - vTemp = _mm_subs_epi16 (vHUp1, vGapOpen); - vE = _mm_max_epi16 (vE, vTemp); - - /* update vF value */ - vF = _mm_subs_epi16 (vF, vGapExtend); - vTemp = _mm_subs_epi16 (vH, vGapOpen); - vF = _mm_max_epi16 (vF, vTemp); - - /* add score to vH */ - vH = _mm_adds_epi16 (vHUp2, vScore); - - /* set vH to max of vH, vE, vF */ - vH = _mm_max_epi16 (vH, vE); - vH = _mm_max_epi16 (vH, vF); - - /* Save value to use for next diagonal vH */ - vHUp2 = vHUp1; - - /* Update highest score encountered this far */ - vMaxScore = _mm_max_epi16(vMaxScore,vH); - } - - _mm_store_si128 (pvEStore + l, vE); - _mm_store_si128 (pvHStore + l, vH); - } - - /* find largest score in the vMaxScore vector */ - vTemp = _mm_srli_si128 (vMaxScore, 8); - vMaxScore = _mm_max_epi16 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 4); - vMaxScore = _mm_max_epi16 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 2); - vMaxScore = _mm_max_epi16 (vMaxScore, vTemp); - - /* store in temporary variable */ - score = (short) _mm_extract_epi16 (vMaxScore, 0); - - /* return largest score */ - return score + SHORT_BIAS; -} - - - - -int -swWozniakByte (unsigned char *querySeq, - int queryLength, - unsigned char *dbSeq, - int dbLength, - unsigned short gapOpen, - unsigned short gapExtend, - char *pMatrix, - __m128i *pvHStore, - __m128i *pvEStore, - unsigned short bias) -{ - int i, j, k, l, m; - int score; - - int dup; - - char *pScore; - - __m128i vE, vF, vH; - __m128i vEUp, vHUp1, vHUp2; - - __m128i vMaxScore; - __m128i vBias; - __m128i vGapOpen; - __m128i vGapExtend; - __m128i vScore; - - __m128i vTemp; - - /* remove unreferenced warning */ - querySeq; - - /* Load the bias to all elements of a constant */ - dup = (bias << 8) | (bias & 0x00ff); - vBias = _mm_insert_epi16 (vBias, dup, 0); - vBias = _mm_shufflelo_epi16 (vBias, 0); - vBias = _mm_shuffle_epi32 (vBias, 0); - - /* Load gap opening penalty to all elements of a constant */ - dup = (gapOpen << 8) | (gapOpen & 0x00ff); - vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0); - vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0); - vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0); - - /* Load gap extension penalty to all elements of a constant */ - dup = (gapExtend << 8) | (gapExtend & 0x00ff); - vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0); - vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0); - vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0); - - vScore = _mm_xor_si128 (vScore, vScore); - vMaxScore = _mm_xor_si128 (vMaxScore, vMaxScore); - - for (i = 0; i < queryLength + 16; i++) - { - _mm_store_si128 (pvEStore + i, vMaxScore); - _mm_store_si128 (pvHStore + i, vMaxScore); - } - - pScore = (char *) &vScore; - - for (i = 0; i < dbLength; i += 16, dbSeq += 16) - { - // zero lots of stuff. - vE = _mm_xor_si128 (vE, vE); - vF = _mm_xor_si128 (vF, vF); - vH = _mm_xor_si128 (vH, vH); - vHUp2 = _mm_xor_si128 (vHUp2, vHUp2); - - vScore = _mm_xor_si128 (vScore, vScore); - - for (j = 0; j < 16; ++j) - { - for (k = 0; k <= j; ++k) { - int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE; - pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k)); - } - for ( ; k < 16; ++k) { - pScore[k] = (char) bias; - } - - // load values of vE and vH from previous row (one unit up) - vEUp = _mm_load_si128 (pvEStore + j); - vHUp1 = _mm_load_si128 (pvHStore + j); - - // shift into place so we have complete vE and vH vectors - // that refer to the values one unit up from each cell - // that we are currently working on. - vTemp = _mm_slli_si128 (vE, 1); - vEUp = _mm_srli_si128 (vEUp, 15); - vEUp = _mm_or_si128 (vEUp, vTemp); - - vTemp = _mm_slli_si128 (vH, 1); - vHUp1 = _mm_srli_si128 (vHUp1, 15); - vHUp1 = _mm_or_si128 (vHUp1, vTemp); - - // do the dynamic programming - - // update vE value - vE = _mm_subs_epu8 (vEUp, vGapExtend); - vTemp = _mm_subs_epu8 (vHUp1, vGapOpen); - vE = _mm_max_epu8 (vE, vTemp); - - // update vF value - vF = _mm_subs_epu8 (vF, vGapExtend); - vTemp = _mm_subs_epu8 (vH, vGapOpen); - vF = _mm_max_epu8 (vF, vTemp); - - // add score to vH - vH = _mm_adds_epu8 (vHUp2, *((__m128i *) pScore)); - vH = _mm_subs_epu8 (vH, vBias); - - // set vH to max of vH, vE, vF - vH = _mm_max_epu8 (vH, vE); - vH = _mm_max_epu8 (vH, vF); - - // Save value to use for next diagonal vH - vHUp2 = vHUp1; - - // Update highest score encountered this far - vMaxScore = _mm_max_epu8 (vMaxScore, vH); - } - - for (l = 0; j < queryLength; ++j, ++l) - { - for (k = 0; k < 16; ++k) { - int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE; - pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k)); - } - - // load values of vE and vH from previous row (one unit up) - vEUp = _mm_load_si128 (pvEStore + j); - vHUp1 = _mm_load_si128 (pvHStore + j); - - // save old values of vE and vH to use on next row - _mm_store_si128 (pvEStore + l, vE); - _mm_store_si128 (pvHStore + l, vH); - - // shift into place so we have complete vE and vH vectors - // that refer to the values one unit up from each cell - // that we are currently working on. - vTemp = _mm_slli_si128 (vE, 1); - vEUp = _mm_srli_si128 (vEUp, 15); - vEUp = _mm_or_si128 (vEUp, vTemp); - - vTemp = _mm_slli_si128 (vH, 1); - vHUp1 = _mm_srli_si128 (vHUp1, 15); - vHUp1 = _mm_or_si128 (vHUp1, vTemp); - - // do the dynamic programming - - // update vE value - vE = _mm_subs_epu8 (vEUp, vGapExtend); - vTemp = _mm_subs_epu8 (vHUp1, vGapOpen); - vE = _mm_max_epu8 (vE, vTemp); - - // update vF value - vF = _mm_subs_epu8 (vF, vGapExtend); - vTemp = _mm_subs_epu8 (vH, vGapOpen); - vF = _mm_max_epu8 (vF, vTemp); - - // add score to vH - vH = _mm_adds_epu8(vHUp2, vScore); - vH = _mm_subs_epu8 (vH, vBias); - - // set vH to max of vH, vE, vF - vH = _mm_max_epu8 (vH, vE); - vH = _mm_max_epu8 (vH, vF); - - // Save value to use for next diagonal vH - vHUp2 = vHUp1; - - // Update highest score encountered this far - vMaxScore = _mm_max_epu8 (vMaxScore, vH); - } - - for (m = 0 ; m < 15; ++j, ++l, ++m) - { - for (k = 0; k <= m; ++k) { - pScore[k] = (char) bias; - } - for ( ; k < 16; ++k) { - int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE; - pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k)); - } - - // save old values of vE and vH to use on next row - _mm_store_si128 (pvEStore + l, vE); - _mm_store_si128 (pvHStore + l, vH); - - // v_score_load contains all zeros - vEUp = _mm_slli_si128 (vE, 1); - vHUp1 = _mm_slli_si128 (vH, 1); - - // do the dynamic programming - - // update vE value - vE = _mm_subs_epu8 (vEUp, vGapExtend); - vTemp = _mm_subs_epu8 (vHUp1, vGapOpen); - vE = _mm_max_epu8 (vE, vTemp); - - // update vF value - vF = _mm_subs_epu8 (vF, vGapExtend); - vTemp = _mm_subs_epu8 (vH, vGapOpen); - vF = _mm_max_epu8 (vF, vTemp); - - // add score to vH - vH = _mm_adds_epu8 (vHUp2, vScore); - vH = _mm_subs_epu8 (vH, vBias); - - // set vH to max of vH, vE, vF - vH = _mm_max_epu8 (vH, vE); - vH = _mm_max_epu8 (vH, vF); - - // Save value to use for next diagonal vH - vHUp2 = vHUp1; - - // Update highest score encountered this far - vMaxScore = _mm_max_epu8(vMaxScore,vH); - } - - _mm_store_si128 (pvEStore + l, vE); - _mm_store_si128 (pvHStore + l, vH); - } - - // find largest score in the vMaxScore vector - vTemp = _mm_srli_si128 (vMaxScore, 8); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 4); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 2); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - vTemp = _mm_srli_si128 (vMaxScore, 1); - vMaxScore = _mm_max_epu8 (vMaxScore, vTemp); - - // store in temporary variable - score = (short) _mm_extract_epi16 (vMaxScore, 0); - score = score & 0x00ff; - - // check if we might have overflowed - if (score + bias >= 255) - { - score = 255; - } - - - // return largest score - return score; -} diff --git a/src/swsse2/swwozniak.h b/src/swsse2/swwozniak.h deleted file mode 100644 index 6c5ed4c..0000000 --- a/src/swsse2/swwozniak.h +++ /dev/null @@ -1,40 +0,0 @@ -/****************************************************************** - Copyright 2006 by Michael Farrar. All rights reserved. - This program may not be sold or incorporated into a commercial product, - in whole or in part, without written consent of Michael Farrar. For - further information regarding permission for use or reproduction, please - contact: Michael Farrar at farrar.michael@gmail.com. -*******************************************************************/ - -/* - Written by Michael Farrar, 2006. - Please send bug reports and/or suggestions to farrar.michael@gmail.com. -*/ - -#ifndef INCLUDE_SWWOZNIAK_H -#define INCLUDE_SWWOZNIAK_H - -#include "swsse2.h" -#include "fastalib.h" - -#define MATRIX_ROW_SIZE 32 - -SW_DATA * -swWozniakInit (unsigned char *querySeq, - int queryLength, - signed char *matrix); - - -void -swWozniakScan (unsigned char *querySeq, - int queryLength, - FASTA_LIB *dbLib, - void *swData, - SEARCH_OPTIONS *options, - SCORE_LIST *scores); - -void -swWozniakComplete (SW_DATA *pSwData); - - -#endif /* INCLUDE_SWWOZNIAK_H */