AC_DEFINE(_FILE_OFFSET_BITS, 64)
+AC_PROG_LIBTOOL
# check zlib
AX_CHECK_ZLIB
CXXFLAGS=$CFLAGS
AC_CONFIG_FILES( [Makefile
- src/Makefile] )
+ src/Makefile
+ src/swsse2/Makefile] )
AC_OUTPUT
-bin_PROGRAMS = fastq-grep fastq-kmers
+SUBDIRS = swsse2
+
+bin_PROGRAMS = fastq-grep fastq-kmers fastq-match
fastq_grep_SOURCES = kseq.h fastq-grep.c
fastq_grep_LDADD = $(PCRE_LIBS) -lz
fastq_kmers_SOURCES = kseq.h fastq-kmers.c
fastq_kmers_LDADD = -lz
+fastq_match_SOURCES = kseq.h fastq-match.c
+fastq_match_LDADD = swsse2/libswsse2.la -lz
+fastq_match_CFLAGS = -msse2
"Options:\n"
" -h, --help print this message\n"
" -v, --invert-match select nonmatching entries\n"
+" -c, --count output only the number of matching sequences\n"
);
}
static int invert_flag;
static int help_flag;
-static int zout_flag;
+static int count_flag;
{
int rc;
int ovector[3];
+ size_t count = 0;
kseq_t* seq;
seq = kseq_init(fin);
3 ); /* output vector length */
if ((invert_flag && rc == PCRE_ERROR_NOMATCH) || rc >= 0) {
- print_fastq_entry( fout, seq );
+ if (count_flag) count++;
+ else print_fastq_entry(fout, seq);
}
}
kseq_destroy(seq);
+
+ if (count_flag) fprintf(fout, "%zu\n", count);
}
int main(int argc, char* argv[])
{
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
const char* pat;
pcre* re;
const char* pat_error;
gzFile gzfin;
- invert_flag = 0;
- help_flag = 0;
- zout_flag = 0;
+ invert_flag = 0;
+ help_flag = 0;
+ count_flag = 0;
int opt;
int opt_idx;
{
{"help", no_argument, &help_flag, 1},
{"invert-match", no_argument, &invert_flag, 1},
+ {"count", no_argument, &count_flag, 1},
{0, 0, 0, 0}
};
while (1) {
- opt = getopt_long(argc, argv, "hv", long_options, &opt_idx);
+ opt = getopt_long(argc, argv, "hvc", long_options, &opt_idx);
if( opt == -1 ) break;
invert_flag = 1;
break;
+ case 'c':
+ count_flag = 1;
+ break;
+
case '?':
return 1;
int main(int argc, char* argv[])
{
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
help_flag = 0;
k = 1;
--- /dev/null
+
+/* Smith-Waterman alignments against sequences within a fastq file.
+ *
+ * Daniel Jones <dcjones@cs.washington.edu>
+ */
+
+#include <stdlib.h>
+#include <zlib.h>
+#include <getopt.h>
+
+#include "swsse2/blosum62.h"
+#include "swsse2/swsse2.h"
+#include "swsse2/matrix.h"
+#include "swsse2/swstriped.h"
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+# include <fcntl.h>
+# include <io.h>
+# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
+#else
+# define SET_BINARY_MODE(file)
+#endif
+
+
+static int help_flag;
+
+
+void print_help()
+{
+ fprintf( stderr,
+"fastq-match [OPTION]... QUERY [FILE]...\n"
+"Perform Smith-Waterman local alignment of a query sequence\n"
+"against each sequence in a fastq file.\n\n"
+"Options:\n"
+" -h, --help print this message\n"
+ );
+}
+
+
+void convert_sequence(unsigned char* s, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
+ }
+}
+
+
+void fastq_match(gzFile fin, FILE* fout,
+ SwStripedData* sw_data,
+ unsigned char* query, int n,
+ SEARCH_OPTIONS* options)
+{
+ int score;
+
+ int gap_init = -(options->gapInit + options->gapExt);
+ int gap_ext = -options->gapExt;
+ int threshold = options->threshold;
+
+ kseq_t* seq;
+ seq = kseq_init(fin);
+
+ while (kseq_read(seq) >= 0) {
+ fprintf(fout, "%s\t", seq->seq.s);
+
+ convert_sequence((unsigned char*)seq->seq.s, seq->seq.l);
+
+ score = swStripedByte(query, n,
+ (unsigned char*)seq->seq.s, seq->seq.l,
+ gap_init, gap_ext,
+ sw_data->pvbQueryProf,
+ sw_data->pvH1,
+ sw_data->pvH2,
+ sw_data->pvE,
+ sw_data->bias);
+ if (score >= 255) {
+ score = swStripedWord(query, n,
+ (unsigned char*)seq->seq.s, seq->seq.l,
+ gap_init, gap_ext,
+ sw_data->pvbQueryProf,
+ sw_data->pvH1,
+ sw_data->pvH2,
+ sw_data->pvE);
+ }
+
+ fprintf(fout, "%d\n", score);
+ }
+
+ kseq_destroy(seq);
+}
+
+
+
+int main(int argc, char* argv[])
+{
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
+ unsigned char* query;
+ int query_len;
+ SwStripedData* sw_data;
+ signed char* mat = blosum62;
+ SEARCH_OPTIONS options;
+
+ options.gapInit = -10;
+ options.gapExt = -2;
+ options.threshold = -1;
+
+ FILE* fin;
+ gzFile gzfin;
+
+ help_flag = 0;
+
+ int opt;
+ int opt_idx;
+
+ static struct option long_options[] =
+ {
+ {"help", no_argument, &help_flag, 1},
+ {0, 0, 0, 0}
+ };
+
+
+ while (1) {
+ opt = getopt_long(argc, argv, "h", long_options, &opt_idx);
+
+ if( opt == -1 ) break;
+
+ switch (opt) {
+ case 0:
+ if (long_options[opt_idx].flag != 0) break;
+ if (optarg) {
+ }
+ break;
+
+ case 'h':
+ help_flag = 1;
+ break;
+
+ case '?':
+ return 1;
+
+ default:
+ abort();
+ }
+ }
+
+ if (help_flag) {
+ print_help();
+ return 0;
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "A query sequence must be specified.\n");
+ return 1;
+ }
+
+ query = (unsigned char*)argv[optind++];
+ query_len = strlen((char*)query);
+ convert_sequence(query, query_len);
+
+ sw_data = swStripedInit(query, query_len, mat);
+
+ if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
+ gzfin = gzdopen( fileno(stdin), "rb" );
+ if (gzfin == NULL) {
+ fprintf(stderr, "Malformed file 'stdin'.\n");
+ return 1;
+ }
+
+ fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
+
+ gzclose(gzfin);
+ }
+ else {
+ for (; optind < argc; optind++) {
+ fin = fopen(argv[optind], "rb");
+ if (fin == NULL) {
+ fprintf(stderr, "No such file '%s'.\n", argv[optind]);
+ continue;
+ }
+
+ gzfin = gzdopen(fileno(fin), "rb");
+ if (gzfin == NULL) {
+ fprintf(stderr, "Malformed file '%s'.\n", argv[optind]);
+ continue;
+ }
+
+ fastq_match(gzfin, stdout, sw_data, query, query_len, &options);
+
+ gzclose(gzfin);
+ }
+ }
+
+ swStripedComplete(sw_data);
+
+ return 0;
+}
+
+
+
--- /dev/null
+
+ Copyright 2006, by Michael Farrar. All rights reserved. The SWSSE2
+ program and documentation may not be sold or incorporated into a
+ commercial product, in whole or in part, without written consent of
+ Michael Farrar.
+
+ For further information regarding permission for use or reproduction,
+ please contact Michael Farrar at:
+
+ farrar.michael@gmail.com
+
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
--- /dev/null
+
+noinst_LTLIBRARIES = libswsse2.la
+
+libswsse2_la_SOURCES = fastalib.h fastalib.c \
+ matrix.h matrix.c \
+ swscalar.h swscalar.c \
+ swsse2.h swsse2.c \
+ swstriped.h swstriped.c \
+ swwozniak.h swwozniak.c \
+ blosum62.h blosum62.c
+
+
+libswsse2_la_CFLAGS = -msse2
+
+EXTRADIST = README COPYRIGHT
+
--- /dev/null
+Introduction
+
+The Smith-Waterman [1] algorithm is one of the most sensitive sequencing
+algorithms in use today. It is also the slowest due to the number of
+calculations needed to perform the search. To speed up the algorithm, it
+has been adapted to use Single Instruction Multiple Data, SIMD, instructions
+found on many common microprocessors today. SIMD instructions are able to
+perform the same operation on multiple pieces of data parallel.
+
+The program swsse2 introduces a new SIMD implementation of the Smith-Waterman
+algorithm for the X86 processor. The weights are precomputed parallel to the
+query sequence, like the Rognes [2] implementation, but are accessed in the
+striped pattern. The new implementation reached speeds six times faster than
+other SIMD implementations.
+
+Below is a graph comparing the total search times of 11 queries, 3806 residues,
+against the Swiss-Prot 49.1 database, 75,841,138 residues. The tests were run
+on a PC with a 2.00GHz Intel Xeon Core 2 Duo processor with 2 GB RAM. The
+program is singlely threaded, so the number of cores has no affect on the run
+times. The Wozniak, Rognes and striped implementations were run with the
+scoring matrices BLOSUM50 and BLOSUM62 and four different gap penalties, 10-k,
+10-2k, 14-2k and 40-2k. Since the Wozniak's runtime does not change depending
+on the scoring matrix, one line is used for both scoring matrices.
+
+Build Instructions
+
+ * Download the zip file with the swsse2 sources.
+ * Unzip the sources.
+ * Load the swsse2.vcproj file into Microsoft Visual C++ 2005.
+ * Build the project (F7). For optimized code, be sure to change the
+ configuration to a Release build.
+ * The swsse2.exe file is in the Release directory ready to be run.
+
+Running
+
+To run swsse2 three files must be provided, the scoring matrix, query sequence
+and the database sequence. Four scoring matrices are provided with the
+release, BLOSUM45, BLOSUM50, BLOSUM62 and BLOSUM80. The query sequence and
+database sequence must be in the FASTA format. For example, to run with the
+default gap penalties 10-2k, the scoring matrix BLOSUM50, the query sequence
+ptest1.fasta and the sequence database db.fasta use:
+
+ c:\swsse2>.\Release\swsse2.exe blosum50.mat ptest1.fasta db.fasta
+ ptest1.fasta vs db.fasta
+ Matrix: blosum50.mat, Init: -10, Ext: -2
+
+ Score Description
+ 53 108_LYCES Protein 108 precursor.
+ 53 10KD_VIGUN 10 kDa protein precursor (Clone PSAS10).
+ 32 1431_ECHGR 14-3-3 protein homolog 1.
+ 32 1431_ECHMU 14-3-3 protein homolog 1 (Emma14-3-3.1).
+ 27 110K_PLAKN 110 kDa antigen (PK110) (Fragment).
+ 26 1432_ECHGR 14-3-3 protein homolog 2.
+ 25 13S1_FAGES 13S globulin seed storage protein 1
+ 25 13S3_FAGES 13S globulin seed storage protein 3
+ 25 13S2_FAGES 13S globulin seed storage protein 2
+ 23 12S1_ARATH 12S seed storage protein CRA1
+ 22 13SB_FAGES 13S globulin basic chain.
+ 21 12AH_CLOS4 12-alpha-hydroxysteroid dehydrogenase
+ 21 140U_DROME RPII140-upstream protein.
+ 21 12S2_ARATH 12S seed storage protein CRB
+ 21 1431_LYCES 14-3-3 protein 1.
+ 20 1431_ARATH 14-3-3-like protein GF14
+
+ 21 residues in query string
+ 2014 residues in 25 library sequences
+ Scan time: 0.000 (Striped implementation)
+
+Options
+
+ Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] [-c num]
+ matrix query db
+
+ -h : this help message
+ -n : run a non-vectorized Smith-Waterman search
+ -w : run a vectorized Wozniak search
+ -r : run a vectorized Rognes search (NOT SUPPORTED)
+ -s : run a vectorized striped search (default)
+ -i num : gap init penalty (default -10)
+ -e num : gap extension penalty (default -2)
+ -t num : minimum score threshold (default 20)
+ -c num : number of scores to be displayed (default 250)
+ matrix : scoring matrix file
+ query : query sequence file (fasta format)
+ db : sequence database file (fasta format)
+
+Note
+
+The Rognes implementation is not released as part of the swsse2 package due to
+patent concerns.
+
+References
+
+[1] Smith, T. F. and Waterman, M. S. (1981) Identification of common molecular subsequences. J. Mol. Biol., 147, 195-197.
+
+[2] Rognes, T. and Seeberg, E. (2000) Six-fold speed-up of the Smith-Waterman sequence database searches using parallel processing on common microprocessors. Bioinformatics, 16, 699-706.
+
+
+
+
--- /dev/null
+
+
+signed char blosum62[] =
+{
+ 4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1,
+ -2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1,
+ 0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3,
+ -2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1,
+ -1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4,
+ -2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3,
+ 0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2,
+ -2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0,
+ -1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3,
+ -1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1,
+ -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3,
+ -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1,
+ -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0,
+ -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1,
+ -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3,
+ -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0,
+ 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0,
+ 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1,
+ 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2,
+ -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3,
+ 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1,
+ -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2,
+ -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4
+};
+
+
+
+
--- /dev/null
+
+#ifndef BLOSUM62_H_
+#define BLOSUM62_H_
+
+extern signed char blosum62[];
+
+#endif
+
+
--- /dev/null
+/******************************************************************
+ Copyright 2006 by Michael Farrar. All rights reserved.
+ This program may not be sold or incorporated into a commercial product,
+ in whole or in part, without written consent of Michael Farrar. For
+ further information regarding permission for use or reproduction, please
+ contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+
+/*
+ Written by Michael Farrar, 2006.
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "swsse2.h"
+#include "fastalib.h"
+
+#define READ_BUFFER_SIZE (128 * 1024)
+#define SEQ_NAME_SIZE (128)
+
+FASTA_LIB *openLib (char *file, int pad)
+{
+ FILE *fp;
+
+ FASTA_LIB *lib;
+
+ if ((fp = fopen (file, "r")) == NULL) {
+ fprintf (stderr, "Unable to open file %s\n", file);
+ exit (-1);
+ }
+
+ lib = (FASTA_LIB *) malloc (sizeof (FASTA_LIB));
+ if (!lib) {
+ fprintf (stderr, "Unable to allocate memory for library header\n");
+ exit (-1);
+ }
+
+ lib->readBuffer = (char *) malloc (READ_BUFFER_SIZE);
+ if (!lib->readBuffer) {
+ fprintf (stderr, "Unable to allocate memory for read buffer\n");
+ exit (-1);
+ }
+
+ lib->seqBuffer = (unsigned char *) malloc (MAX_SEQ_LENGTH);
+ if (!lib->seqBuffer) {
+ fprintf (stderr, "Unable to allocate memory for sequence\n");
+ exit (-1);
+ }
+
+ lib->seqName = (char *) malloc (SEQ_NAME_SIZE);
+ if (!lib->seqName) {
+ fprintf (stderr, "Unable to allocate memory for sequence name\n");
+ exit (-1);
+ }
+
+ lib->size = (int) fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp);
+ if (lib->size == 0 && !feof (fp)) {
+ fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp));
+ exit (-1);
+ }
+
+ lib->pos = 0;
+
+ lib->fp = fp;
+
+ lib->sequences = 0;
+ lib->residues = 0;
+
+ lib->pad;
+
+ return lib;
+}
+
+static int
+readNextBlock (FASTA_LIB *lib)
+{
+ FILE *fp = lib->fp;
+ size_t size;
+
+ size = fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp);
+ if (lib->size == 0 && !feof (fp)) {
+ fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp));
+ exit (-1);
+ }
+
+ lib->pos = 0;
+ lib->size = (int) size;
+
+ return lib->size;
+}
+
+unsigned char *
+nextSeq (FASTA_LIB *lib, int *length)
+{
+ int inx;
+ int size;
+ int done;
+ int len;
+
+ char *name = lib->seqName;
+ unsigned char *seq = lib->seqBuffer;
+
+ /* check if we are at the end of the library */
+ if (lib->size == 0) {
+ *length = 0;
+ return NULL;
+ }
+
+ if (lib->pos == lib->size) {
+ readNextBlock (lib);
+ }
+
+ inx = lib->pos;
+
+ /* check for the start of a sequence */
+ if (lib->readBuffer[inx] != '>') {
+ fprintf (stderr, "Error parsing fasta file expecting > found %c\n",
+ lib->readBuffer[inx]);
+ exit (-1);
+ }
+
+ ++inx;
+
+ /* read in the sequence name */
+ len = 0;
+ done = 0;
+ do {
+ if (inx >= lib->size) {
+ size = readNextBlock (lib);
+ if (size == 0) {
+ *length = 0;
+ return NULL;
+ }
+ inx = lib->pos;
+ } else if (lib->readBuffer[inx] == '\n') {
+ *name = '\0';
+ done = 1;
+ } else if (len < SEQ_NAME_SIZE - 1) {
+ *name++ = lib->readBuffer[inx];
+ len++;
+ }
+ ++inx;
+ } while (!done);
+
+ lib->pos = inx;
+
+ /* read in the sequence */
+ len = 0;
+ done = 0;
+ do {
+ if (inx >= lib->size) {
+ size = readNextBlock (lib);
+ if (size == 0) {
+ *seq = '\0';
+ done = 1;
+ }
+ inx = 0;
+ } else if (isspace(lib->readBuffer[inx])) {
+ ++inx;
+ } else if (lib->readBuffer[inx] == '>') {
+ *seq = '\0';
+ done = 1;
+ } else if (len >= MAX_SEQ_LENGTH) {
+ fprintf (stderr, "Sequence %s exceeds maximum length\n",
+ lib->seqName);
+ exit (-1);
+ } else {
+ int value = AMINO_ACID_VALUE[lib->readBuffer[inx]];
+ if (value == -1) {
+ fprintf (stderr, "Unknown amino acid %c in sequence %s\n",
+ lib->readBuffer[inx], lib->seqName);
+ exit (-1);
+ }
+ *seq++ = (char) value;
+ inx++;
+ len++;
+ }
+ } while (!done);
+
+ lib->pos = inx;
+ *length = len;
+
+ lib->sequences++;
+ lib->residues += len;
+
+ /* check if we need to pad the sequence to a multiple of 16 */
+ if (lib->pad) {
+ inx = 16 - (len % 16);
+ while (inx--) {
+ *seq++ = ALPHA_SIZE;
+ }
+ *seq = '\0';
+ }
+
+ return lib->seqBuffer;
+}
+
+void closeLib (FASTA_LIB *lib)
+{
+ fclose (lib->fp);
+
+ free (lib->readBuffer);
+ free (lib->seqBuffer);
+ free (lib->seqName);
+
+ free (lib);
+}
--- /dev/null
+/******************************************************************
+ Copyright 2006 by Michael Farrar. All rights reserved.
+ This program may not be sold or incorporated into a commercial product,
+ in whole or in part, without written consent of Michael Farrar. For
+ further information regarding permission for use or reproduction, please
+ contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+
+/*
+ Written by Michael Farrar, 2006.
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+
+#ifndef INCLUDE_FASTALIB_H
+#define INCLUDE_FASTALIB_H
+
+#include <stdio.h>
+
+#define MAX_SEQ_LENGTH (64 * 1024)
+
+typedef struct {
+ char *readBuffer;
+
+ char *seqName;
+ unsigned char *seqBuffer;
+
+ int pos;
+ int size;
+
+ FILE *fp;
+
+ int sequences;
+ int residues;
+
+ int pad;
+} FASTA_LIB;
+
+FASTA_LIB *openLib (char *file, int pad);
+void closeLib (FASTA_LIB *lib);
+
+unsigned char *nextSeq (FASTA_LIB *lib, int *length);
+
+#define seqName(LIB) (LIB->seqName)
+
+#endif /* INCLUDE_FASTALIB_H */
--- /dev/null
+/******************************************************************
+ Copyright 2006 by Michael Farrar. All rights reserved.
+ This program may not be sold or incorporated into a commercial product,
+ in whole or in part, without written consent of Michael Farrar. For
+ further information regarding permission for use or reproduction, please
+ contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+
+/*
+ Written by Michael Farrar, 2006.
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "swsse2.h"
+#include "matrix.h"
+
+#define BUF_SIZE 512
+
+char *skipSpaces (char *line)
+{
+ while (isspace (*line)) {
+ ++line;
+ }
+
+ return line;
+}
+
+signed char *readMatrix (char *file)
+{
+ FILE *fp;
+ char line[BUF_SIZE];
+
+ signed char *matrix;
+
+ int mark[ALPHA_SIZE];
+ int order[ALPHA_SIZE];
+
+ int done;
+ int i;
+
+ int errors = 0;
+ int count = 0;
+
+ if ((fp = fopen (file, "r")) == NULL) {
+ fprintf (stderr, "Unable to open file %s\n", file);
+ exit (-1);
+ }
+
+ matrix = (signed char *) malloc (ALPHA_SIZE * ALPHA_SIZE);
+ if (!matrix) {
+ fprintf (stderr, "Unable to allocate memory for scoring matrix\n");
+ exit (-1);
+ }
+
+ /* initialize the order and mark arrays */
+ for (i = 0; i < ALPHA_SIZE; ++i) {
+ order[i] = -1;
+ mark[i] = -1;
+ }
+
+ /* read the first line of the matrix giving the amino acid order */
+ done = 0;
+ while (!done && fgets (line, BUF_SIZE, fp) != NULL) {
+ char *ptr = skipSpaces (line);
+ if (*ptr && *ptr != '#') {
+
+ while (*ptr && *ptr != '#') {
+ int inx = AMINO_ACID_VALUE[*ptr];
+
+ if (inx == -1) {
+ fprintf (stderr, "Unknown amino acid %c in %s\n", *ptr, file);
+ ++errors;
+ } else if (mark[inx] != -1) {
+ fprintf (stderr, "Amino acid %c defined twice\n", *ptr);
+ ++errors;
+ } else if (count >= ALPHA_SIZE) {
+ /* this should not happen, but we will be safe */
+ fprintf (stderr, "Too many amino acids %d\n", count);
+ ++errors;
+ } else {
+ order[count++] = inx;
+ mark[inx] = inx;
+ }
+ ptr = skipSpaces (ptr + 1);
+ }
+
+ done = 1;
+ }
+ }
+
+ /* make sure all amino acids are defined */
+ for (i = 0; i < ALPHA_SIZE; ++i) {
+ if (order[i] < 0) {
+ fprintf (stderr, "Missing column for amino acid %c\n",
+ AMINO_ACIDS[i]);
+ ++errors;
+ }
+ mark[i] = -1;
+ }
+
+ if (errors > 0) {
+ fprintf (stderr, "Terminating due to errors in matrix file\n");
+ exit (-1);
+ }
+
+ /* read the scores for the amino acids */
+ while (fgets (line, BUF_SIZE, fp) != NULL) {
+ signed char *row;
+ char *ptr = skipSpaces (line);
+ if (*ptr && *ptr != '#') {
+ char aminoAcid = *ptr;
+ int inx = AMINO_ACID_VALUE[*ptr];
+ if (inx == -1) {
+ fprintf (stderr, "Unknown amino acid %c in matrix\n", *ptr);
+ ++errors;
+ } else if (mark[inx] != -1) {
+ fprintf (stderr, "Row %c defined twice\n", *ptr);
+ ++errors;
+ }
+
+ row = &matrix[inx * ALPHA_SIZE];
+
+ for (i = 0; i < ALPHA_SIZE; ++i) {
+ int sign = 1;
+ int num = 0;
+
+ ptr = skipSpaces (ptr + 1);
+
+ /* check the sign */
+ if (*ptr == '-') {
+ sign = -1;
+ ++ptr;
+ }
+
+ do {
+ if (*ptr >= '0' && *ptr <= '9') {
+ num = num * 10 + (*ptr - '0');
+ ptr++;
+ } else {
+ char name[16];
+ char *pName;
+ if (isspace (*ptr)) {
+ pName = "space";
+ } else if (*ptr == 0) {
+ pName = "end of line";
+ } else {
+ name[0] = *ptr;
+ name[1] = 0;
+ pName = name;
+ }
+ fprintf (stderr, "Row %c Expecting digit found %s\n",
+ aminoAcid, pName);
+ exit (-1);
+ }
+ } while (*ptr && !isspace (*ptr));
+
+ num = num * sign;
+
+ if (num < -128 || num > 127) {
+ fprintf (stderr, "Weight %d out of range row %c\n",
+ aminoAcid, num);
+ ++errors;
+ num = 0;
+ }
+
+ row[order[i]] = (char) num;
+ }
+
+ if (i < ALPHA_SIZE) {
+ fprintf (stderr, "Amino acid row %c incomplete\n", aminoAcid);
+ ++errors;
+ }
+
+ mark[inx] = 1;
+ }
+ }
+
+ /* make sure all amino acids are defined */
+ for (i = 0; i < ALPHA_SIZE; ++i) {
+ if (mark[i] < 0) {
+ fprintf (stderr, "Missing row for amino acid %c\n",
+ AMINO_ACIDS[i]);
+ ++errors;
+ }
+ }
+
+ if (errors) {
+ fprintf (stderr, "Terminating due to errors in matrix %s\n", file);
+ }
+
+ fclose (fp);
+
+ return matrix;
+}
--- /dev/null
+/******************************************************************
+ Copyright 2006 by Michael Farrar. All rights reserved.
+ This program may not be sold or incorporated into a commercial product,
+ in whole or in part, without written consent of Michael Farrar. For
+ further information regarding permission for use or reproduction, please
+ contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+
+/*
+ Written by Michael Farrar, 2006.
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+
+#ifndef INCLUDE_MATRIX_H
+#define INCLUDE_MATRIX_H
+
+signed char *readMatrix (char *file);
+
+#endif /* INCLUDE_MATRIX_H */
--- /dev/null
+/******************************************************************\r
+ Copyright 2006 by Michael Farrar. All rights reserved.\r
+ This program may not be sold or incorporated into a commercial product,\r
+ in whole or in part, without written consent of Michael Farrar. For \r
+ further information regarding permission for use or reproduction, please \r
+ contact: Michael Farrar at farrar.michael@gmail.com.\r
+*******************************************************************/\r
+\r
+/*\r
+ Written by Michael Farrar, 2006.\r
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
+*/\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+\r
+#include "swsse2.h"\r
+#include "swscalar.h"\r
+\r
+typedef struct {\r
+ int *pMatrix;\r
+ int *pH;\r
+ int *pE;\r
+ int *pData;\r
+} SwScalarData;\r
+\r
+int\r
+swScalar (unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ int *pMatrix,\r
+ int *pH,\r
+ int *pE);\r
+\r
+void *\r
+swScalarInit(unsigned char *querySeq,\r
+ int queryLength,\r
+ signed char *matrix)\r
+{\r
+ int i, j;\r
+ int nCount;\r
+\r
+ int *pi;\r
+\r
+ SwScalarData *pSwData;\r
+ \r
+ /* remove unreferenced warnings */\r
+ querySeq;\r
+\r
+ pSwData = (SwScalarData *) malloc (sizeof (SwScalarData));\r
+ if (!pSwData) {\r
+ fprintf (stderr, "Unable to allocate memory for SW data\n");\r
+ exit (-1);\r
+ }\r
+\r
+ nCount = ALPHA_SIZE * ALPHA_SIZE + /* matrix */\r
+ (queryLength * 3); /* vH1, vH2 and vE */\r
+\r
+ pSwData->pData = (int *) calloc (nCount, sizeof (int));\r
+ if (!pSwData->pData) {\r
+ fprintf (stderr, "Unable to allocate memory for SW data buffers\n");\r
+ exit (-1);\r
+ }\r
+\r
+ pSwData->pMatrix = pSwData->pData;\r
+\r
+ pSwData->pH = pSwData->pMatrix + ALPHA_SIZE * ALPHA_SIZE;\r
+ pSwData->pE = pSwData->pH + queryLength;\r
+\r
+ /* Conver the scoring matrix to ints */\r
+ pi = pSwData->pMatrix;\r
+ for (i = 0; i < ALPHA_SIZE; ++i) {\r
+ for (j = 0; j < ALPHA_SIZE; ++j) {\r
+ *pi++ = (int) *matrix++;\r
+ }\r
+ }\r
+\r
+ return pSwData;\r
+}\r
+\r
+void swScalarScan (unsigned char *querySeq,\r
+ int queryLength,\r
+ FASTA_LIB *dbLib,\r
+ void *swData,\r
+ SEARCH_OPTIONS *options,\r
+ SCORE_LIST *scores)\r
+{\r
+ int score;\r
+\r
+ int threshold = options->threshold;\r
+\r
+ unsigned char *dbSeq;\r
+ int dbLen;\r
+\r
+ int gapInit = -(options->gapInit + options->gapExt);\r
+ int gapExt = -options->gapExt;\r
+\r
+ SwScalarData *scalarData = (SwScalarData *) swData;\r
+\r
+ dbSeq = nextSeq (dbLib, &dbLen);\r
+ while (dbLen > 0) {\r
+\r
+ score = swScalar (querySeq, queryLength, \r
+ dbSeq, dbLen, \r
+ gapInit, gapExt, \r
+ scalarData->pMatrix,\r
+ scalarData->pH,\r
+ scalarData->pE);\r
+\r
+ if (score >= threshold) {\r
+ int minScore = insertList (scores, score, seqName (dbLib));\r
+ if (minScore >= threshold) {\r
+ threshold = minScore;\r
+ }\r
+ }\r
+\r
+ dbSeq = nextSeq (dbLib, &dbLen);\r
+ }\r
+}\r
+\r
+void\r
+swScalarComplete(void *pSwData)\r
+{\r
+ SwScalarData *pScalarData = (SwScalarData *) pSwData;\r
+\r
+ free (pScalarData->pData);\r
+ free (pScalarData);\r
+}\r
+\r
+int\r
+swScalar(unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ int *pMatrix,\r
+ int *pH,\r
+ int *pE)\r
+{\r
+ int i, j;\r
+\r
+ int E, F, H;\r
+ int prevH;\r
+\r
+ int maxScore = 0;\r
+\r
+ int *pScore;\r
+\r
+ /* Zero out the storage vector */\r
+ for (i = 0; i < queryLength; i++)\r
+ {\r
+ *(pE + i) = 0;\r
+ *(pH + i) = 0;\r
+ }\r
+\r
+ for (i = 0; i < dbLength; ++i)\r
+ {\r
+ /* fetch first data asap. */\r
+ pScore = pMatrix + dbSeq[i] * ALPHA_SIZE;\r
+\r
+ /* zero out F. */\r
+ F = 0;\r
+\r
+ /* load the next h value */\r
+ prevH = 0;\r
+\r
+ for (j = 0; j < queryLength; j++)\r
+ {\r
+ /* load E values */\r
+ E = *(pE + j);\r
+\r
+ /* add score to H */\r
+ H = prevH + *(pScore + querySeq[j]);\r
+ if (H < 0) {\r
+ H = 0;\r
+ }\r
+\r
+ /* Update highest score encountered this far */\r
+ if (H > maxScore) {\r
+ maxScore = H;\r
+ }\r
+\r
+ /* get max from H, E and F */\r
+ if (E > H) {\r
+ H = E;\r
+ }\r
+ if (F > H) {\r
+ H = F;\r
+ }\r
+\r
+ /* save H values */\r
+ prevH = *(pH + j);\r
+ *(pH + j) = H;\r
+\r
+ H = H - gapOpen;\r
+\r
+ /* update E value */\r
+ E = E - gapExtend;\r
+ if (H > E) {\r
+ E = H;\r
+ }\r
+ if (E < 0) {\r
+ E = 0;\r
+ }\r
+\r
+ /* update vF value */\r
+ F = F - gapExtend;\r
+ if (H > F) {\r
+ F = H;\r
+ }\r
+ if (F < 0) {\r
+ F = 0;\r
+ }\r
+\r
+ /* save vE values */\r
+ *(pE + j) = E;\r
+ }\r
+ }\r
+\r
+ /* return largest score */\r
+ return maxScore;\r
+}\r
--- /dev/null
+/******************************************************************
+ Copyright 2006 by Michael Farrar. All rights reserved.
+ This program may not be sold or incorporated into a commercial product,
+ in whole or in part, without written consent of Michael Farrar. For
+ further information regarding permission for use or reproduction, please
+ contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+
+/*
+ Written by Michael Farrar, 2006.
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+
+#ifndef INCLUDE_SWSCALAR_H
+#define INCLUDE_SWSCALAR_H
+
+#include "swsse2.h"
+#include "fastalib.h"
+
+SW_DATA *
+swScalarInit (unsigned char *querySeq,
+ int queryLength,
+ signed char *matrix);
+
+
+void
+swScalarScan (unsigned char *querySeq,
+ int queryLength,
+ FASTA_LIB *dbLib,
+ void *swData,
+ SEARCH_OPTIONS *options,
+ SCORE_LIST *scores);
+
+
+void
+swScalarComplete (SW_DATA *pSwData);
+
+
+#endif /* INCLUDE_SWSCALAR_H */
--- /dev/null
+/******************************************************************\r
+ Copyright 2006 by Michael Farrar. All rights reserved.\r
+ This program may not be sold or incorporated into a commercial product,\r
+ in whole or in part, without written consent of Michael Farrar. For \r
+ further information regarding permission for use or reproduction, please \r
+ contact: Michael Farrar at farrar.michael@gmail.com.\r
+*******************************************************************/\r
+\r
+/*\r
+ Written by Michael Farrar, 2006.\r
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
+*/\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+\r
+#include <time.h>\r
+#include <sys/timeb.h>\r
+\r
+#include "swsse2.h"\r
+#include "matrix.h"\r
+#include "fastalib.h"\r
+\r
+#include "swscalar.h"\r
+#include "swwozniak.h"\r
+#ifdef WITH_ROGNES\r
+#include "swrognes.h"\r
+#endif\r
+#include "swstriped.h"\r
+\r
+typedef enum { SCALAR, \r
+ WOZNIAK, \r
+#ifdef WITH_ROGNES\r
+ ROGNES, \r
+#endif\r
+ STRIPED \r
+} SW_TYPES;\r
+\r
+const char *SW_IMPLEMENATION[] = {\r
+ "Non-optimized",\r
+ "Wozniak",\r
+#ifdef WITH_ROGNES\r
+ "Rognes",\r
+#endif\r
+ "Striped",\r
+};\r
+\r
+typedef struct {\r
+ SW_DATA *(*init) (unsigned char *querySeq,\r
+ int queryLength,\r
+ signed char *matrix);\r
+ void (*scan) (unsigned char *querySeq,\r
+ int queryLength,\r
+ FASTA_LIB *dbLib,\r
+ void *swData,\r
+ SEARCH_OPTIONS *options,\r
+ SCORE_LIST *scores);\r
+ void (*done) (SW_DATA *pSwData);\r
+} SW_FUNCT_DEFS;\r
+\r
+SW_FUNCT_DEFS swFuncs[] = {\r
+ {\r
+ swScalarInit,\r
+ swScalarScan,\r
+ swScalarComplete,\r
+ },\r
+ {\r
+ swWozniakInit,\r
+ swWozniakScan,\r
+ swWozniakComplete,\r
+ },\r
+#ifdef WITH_ROGNES\r
+ {\r
+ swRognesInit,\r
+ swRognesScan,\r
+ swRognesComplete,\r
+ },\r
+#endif\r
+ {\r
+ swStripedInit,\r
+ swStripedScan,\r
+ swStripedComplete,\r
+ },\r
+};\r
+\r
+const char AMINO_ACIDS[ALPHA_SIZE] = {\r
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', \r
+ 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', \r
+ 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'\r
+};\r
+\r
+const int AMINO_ACID_VALUE[256] = {\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1,\r
+ 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,\r
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1,\r
+ 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1\r
+};\r
+\r
+SCORE_LIST *initList (int count);\r
+void freeList (SCORE_LIST *list);\r
+\r
+void printResults (SCORE_LIST *list);\r
+\r
+void printUsage (void)\r
+{\r
+ printf ("Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] "\r
+ "[-c num] matrix query db\n");\r
+ printf (" -h : this help message\n");\r
+ printf (" -n : run a non-vectorized Smith-Waterman search\n");\r
+ printf (" -w : run a vectorized Wozniak search\n");\r
+#ifdef WITH_ROGNES\r
+ printf (" -r : run a vectorized Rognes search\n");\r
+#else\r
+ printf (" -r : run a vectorized Rognes search (NOT SUPPORTED)\n");\r
+#endif\r
+ printf (" -s : run a vectorized striped search (default)\n");\r
+ printf (" -i num : gap init penalty (default -10)\n");\r
+ printf (" -e num : gap extension penalty (default -2)\n");\r
+ printf (" -t num : minimum score threshold (default 20)\n");\r
+ printf (" -c num : number of scores to be displayed (default 250)\n");\r
+ printf (" matrix : scoring matrix file\n");\r
+ printf (" query : query sequence file (fasta format)\n");\r
+ printf (" db : sequence database file (fasta format)\n");\r
+}\r
+\r
+#if 0\r
+int main (int argc, char **argv)\r
+{\r
+ int i;\r
+\r
+ int penalty;\r
+\r
+ int rptCount = 250;\r
+\r
+ SW_TYPES swType = STRIPED;\r
+\r
+ char *dbFile = NULL;\r
+ char *queryFile = NULL;\r
+ char *matrixFile = NULL;\r
+\r
+ signed char *matrix;\r
+\r
+ unsigned char *querySeq;\r
+ int queryLen;\r
+\r
+ SCORE_LIST *list;\r
+\r
+ FASTA_LIB *queryLib;\r
+ FASTA_LIB *dbLib;\r
+\r
+ void *swData;\r
+\r
+ struct _timeb startTime;\r
+ struct _timeb endTime;\r
+\r
+ double startTimeSec;\r
+ double endTimeSec;\r
+\r
+ SEARCH_OPTIONS options;\r
+\r
+ if (argc < 4) {\r
+ printUsage ();\r
+ exit (-1);\r
+ }\r
+\r
+ /* set the default options */\r
+ options.gapInit = -10;\r
+ options.gapExt = -2;\r
+ options.threshold = 20;\r
+\r
+ i = 1;\r
+ while (i < argc) {\r
+ if (i + 3 == argc) {\r
+ /* should be matrix file name */\r
+ matrixFile = argv[i];\r
+\r
+ } else if (i + 2 == argc) {\r
+ /* should be query file name */\r
+ queryFile = argv[i];\r
+\r
+ } else if (i + 1 == argc) {\r
+ /* should be matrix file name */\r
+ dbFile = argv[i];\r
+\r
+ } else {\r
+ /* process arguements */\r
+ switch (argv[i][1]) {\r
+ case 'h':\r
+ printUsage ();\r
+ break;\r
+ case 'n':\r
+ swType = SCALAR;\r
+ break;\r
+ case 'r':\r
+#ifdef WITH_ROGNES\r
+ swType = ROGNES;\r
+#else\r
+ fprintf (stderr, "The ROGNES implementation is not supported\n");\r
+ exit (-1);\r
+#endif\r
+ break;\r
+ case 'w':\r
+ swType = WOZNIAK;\r
+ break;\r
+ case 's':\r
+ swType = STRIPED;\r
+ break;\r
+ case 'i':\r
+ penalty = atoi (argv[++i]);\r
+ if (penalty > 0 || penalty < -128) {\r
+ fprintf (stderr, "Invalid gap init %d\n", penalty);\r
+ fprintf (stderr, "Valid range is 0 - -128\n");\r
+ exit (-1);\r
+ }\r
+ options.gapInit = (unsigned short) penalty;\r
+ break;\r
+ case 'e':\r
+ penalty = atoi (argv[++i]);\r
+ if (penalty > 0 || penalty < -128) {\r
+ fprintf (stderr, "Invalid gap extension %d\n", penalty);\r
+ fprintf (stderr, "Valid range is 0 - -128\n");\r
+ exit (-1);\r
+ }\r
+ options.gapExt = (unsigned short) penalty;\r
+ break;\r
+ case 't':\r
+ options.threshold = atoi (argv[++i]);\r
+ break;\r
+ case 'c':\r
+ rptCount = atoi (argv[++i]);\r
+ if (rptCount < 10) {\r
+ rptCount = 10;\r
+ }\r
+ break;\r
+ default:\r
+ fprintf (stderr, "Invalid option %s\n", argv[i]);\r
+ printUsage ();\r
+ exit (-1);\r
+ }\r
+ }\r
+ ++i;\r
+ }\r
+\r
+ list = initList (rptCount);\r
+\r
+ matrix = readMatrix (matrixFile);\r
+ if (matrix == NULL) {\r
+ fprintf (stderr, "Error reading matrix\n");\r
+ exit (-1);\r
+ }\r
+\r
+ dbLib = openLib (dbFile, swType == WOZNIAK);\r
+ queryLib = openLib (queryFile, 0);\r
+\r
+ querySeq = nextSeq (queryLib, &queryLen);\r
+ if (queryLen == 0) {\r
+ fprintf (stderr, "Empty query sequence\n");\r
+ exit (-1);\r
+ }\r
+\r
+ printf ("%s vs %s\n", queryFile, dbFile);\r
+ printf ("Matrix: %s, Init: %d, Ext: %d\n\n", \r
+ matrixFile, options.gapInit, options.gapExt);\r
+\r
+ _ftime(&startTime);\r
+\r
+ swData = (swFuncs[swType].init) (querySeq, queryLen, matrix);\r
+\r
+ (swFuncs[swType].scan) (querySeq, queryLen, dbLib, swData, &options, list);\r
+\r
+ (swFuncs[swType].done) (swData);\r
+\r
+ _ftime(&endTime);\r
+\r
+ printResults (list);\r
+\r
+ printf ("\n");\r
+ printf ("%d residues in query string\n", queryLen);\r
+ printf ("%d residues in %d library sequences\n", \r
+ dbLib->residues, dbLib->sequences);\r
+\r
+ startTimeSec = startTime.time + startTime.millitm / 1000.0;\r
+ endTimeSec = endTime.time + endTime.millitm / 1000.0;\r
+ printf ("Scan time: %6.3f (%s implementation)\n", \r
+ endTimeSec - startTimeSec,\r
+ SW_IMPLEMENATION[swType]);\r
+ \r
+ closeLib (queryLib);\r
+ closeLib (dbLib);\r
+\r
+ free (matrix);\r
+\r
+ freeList (list);\r
+}\r
+#endif\r
+\r
+SCORE_LIST *\r
+initList (int count)\r
+{\r
+ int i;\r
+\r
+ SCORE_LIST *hdr;\r
+ SCORE_NODE *list;\r
+ SCORE_NODE *prev;\r
+\r
+ hdr = (SCORE_LIST *) malloc (sizeof (SCORE_LIST));\r
+ if (hdr == NULL) {\r
+ fprintf (stderr, "Cannot allocate storage for score header\n");\r
+ exit (-1);\r
+ }\r
+\r
+ list = (SCORE_NODE *) calloc (count, sizeof (SCORE_NODE));\r
+ if (list == NULL) {\r
+ fprintf (stderr, "Cannot allocate storage for scores\n");\r
+ exit (-1);\r
+ }\r
+\r
+ /* initialize the scores list */\r
+ hdr->minScore = 0;\r
+ hdr->first = NULL;\r
+ hdr->last = NULL;\r
+ hdr->free = list;\r
+ hdr->buffer = list;\r
+\r
+ prev = NULL;\r
+ for (i = 0; i < count; ++i) {\r
+ list[i].name[0] = '\0';\r
+ list[i].score = 0;\r
+\r
+ if (i == 0) {\r
+ list[i].prev = NULL;\r
+ } else {\r
+ list[i].prev = &list[i-1];\r
+ }\r
+\r
+ if (i == count - 1) {\r
+ list[i].next = NULL;\r
+ } else {\r
+ list[i].next = &list[i+1];\r
+ }\r
+ }\r
+\r
+ return hdr;\r
+}\r
+\r
+void freeList (SCORE_LIST *list)\r
+{\r
+ free (list->buffer);\r
+ free (list);\r
+}\r
+\r
+int insertList (SCORE_LIST *list, int score, char *name)\r
+{\r
+ SCORE_NODE *node;\r
+ SCORE_NODE *ptr = list->first;\r
+\r
+ if (list->free != NULL) {\r
+ node = list->free;\r
+ list->free = list->free->next;\r
+ } else if (score > list->last->score) {\r
+ node = list->last;\r
+ list->last = node->prev;\r
+ list->last->next = NULL;\r
+ } else {\r
+ /* should never happen */\r
+ return list->minScore + 1;\r
+ }\r
+\r
+ strncpy (node->name, name, MAX_SCORE_NAME);\r
+ node->name[MAX_SCORE_NAME - 1] = '\0';\r
+ node->score = score;\r
+\r
+ while (ptr && ptr->score >= score) {\r
+ ptr = ptr->next;\r
+ }\r
+\r
+ if (list->first == NULL) {\r
+ list->first = node;\r
+ list->last = node;\r
+ node->prev = NULL;\r
+ node->next = NULL;\r
+ } else if (ptr == NULL) {\r
+ node->prev = list->last;\r
+ node->next = NULL;\r
+ node->prev->next = node;\r
+ list->last = node;\r
+ } else {\r
+ node->prev = ptr->prev;\r
+ node->next = ptr;\r
+\r
+ if (node->prev == NULL) {\r
+ list->first = node;\r
+ } else {\r
+ node->prev->next = node;\r
+ }\r
+ ptr->prev = node;\r
+ }\r
+\r
+ if (list->free == NULL) {\r
+ list->minScore = list->last->score + 1;\r
+ }\r
+\r
+ return list->minScore;\r
+}\r
+\r
+void printResults (SCORE_LIST *list)\r
+{\r
+ SCORE_NODE *ptr = list->first;\r
+\r
+ printf ("Score Description\n");\r
+\r
+ while (ptr) {\r
+ printf ("%5d %s\n", ptr->score, ptr->name);\r
+ ptr = ptr->next;\r
+ }\r
+}\r
+\r
--- /dev/null
+/******************************************************************\r
+ Copyright 2006 by Michael Farrar. All rights reserved.\r
+ This program may not be sold or incorporated into a commercial product,\r
+ in whole or in part, without written consent of Michael Farrar. For \r
+ further information regarding permission for use or reproduction, please \r
+ contact: Michael Farrar at farrar.michael@gmail.com.\r
+*******************************************************************/\r
+\r
+/*\r
+ Written by Michael Farrar, 2006.\r
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
+*/\r
+\r
+#ifndef INCLUDE_SWSSE2_H\r
+#define INCLUDE_SWSSE2_H\r
+\r
+typedef void SW_DATA;\r
+\r
+#define ALPHA_SIZE 23\r
+\r
+extern const char AMINO_ACIDS[ALPHA_SIZE];\r
+extern const int AMINO_ACID_VALUE[256];\r
+\r
+#define SHORT_BIAS 32768\r
+\r
+typedef struct {\r
+ short gapInit;\r
+ short gapExt;\r
+ int threshold;\r
+} SEARCH_OPTIONS;\r
+\r
+#define MAX_SCORE_NAME 64\r
+typedef struct SCORE_STRUCT {\r
+ int score;\r
+ char name[MAX_SCORE_NAME];\r
+ struct SCORE_STRUCT *prev;\r
+ struct SCORE_STRUCT *next;\r
+} SCORE_NODE;\r
+\r
+typedef struct {\r
+ int minScore;\r
+ SCORE_NODE *first;\r
+ SCORE_NODE *last;\r
+ SCORE_NODE *free;\r
+ void *buffer;\r
+} SCORE_LIST;\r
+\r
+int insertList (SCORE_LIST *list, int score, char *name);\r
+\r
+#endif /* INCLUDE_SWSSE2_H */\r
--- /dev/null
+/******************************************************************\r
+ Copyright 2006 by Michael Farrar. All rights reserved.\r
+ This program may not be sold or incorporated into a commercial product,\r
+ in whole or in part, without written consent of Michael Farrar. For \r
+ further information regarding permission for use or reproduction, please \r
+ contact: Michael Farrar at farrar.michael@gmail.com.\r
+*******************************************************************/\r
+\r
+/*\r
+ Written by Michael Farrar, 2006.\r
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
+*/\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <emmintrin.h>\r
+\r
+#include "swsse2.h"\r
+#include "swstriped.h"\r
+\r
+void *\r
+swStripedInit(unsigned char *querySeq,\r
+ int queryLength,\r
+ signed char *matrix)\r
+{\r
+ int i, j, k;\r
+\r
+ int segSize;\r
+ int nCount;\r
+\r
+ int bias;\r
+\r
+ int lenQryByte;\r
+ int lenQryShort;\r
+\r
+ int weight;\r
+\r
+ short *ps;\r
+ char *pc;\r
+\r
+ signed char *matrixRow;\r
+\r
+ size_t aligned;\r
+\r
+ SwStripedData *pSwData;\r
+ \r
+ lenQryByte = (queryLength + 15) / 16;\r
+ lenQryShort = (queryLength + 7) / 8;\r
+\r
+ pSwData = (SwStripedData *) malloc (sizeof (SwStripedData));\r
+ if (!pSwData) {\r
+ fprintf (stderr, "Unable to allocate memory for SW data\n");\r
+ exit (-1);\r
+ }\r
+\r
+ nCount = 64 + /* slack bytes */\r
+ lenQryByte * ALPHA_SIZE + /* query profile byte */\r
+ lenQryShort * ALPHA_SIZE + /* query profile short */\r
+ (lenQryShort * 3); /* vH1, vH2 and vE */\r
+\r
+ pSwData->pData = (unsigned char *) calloc (nCount, sizeof (__m128i));\r
+ if (!pSwData->pData) {\r
+ fprintf (stderr, "Unable to allocate memory for SW data buffers\n");\r
+ exit (-1);\r
+ }\r
+\r
+ /* since we might port this to another platform, lets align the data */\r
+ /* to 16 byte boundries ourselves */\r
+ aligned = ((size_t) pSwData->pData + 15) & ~(0x0f);\r
+\r
+ pSwData->pvbQueryProf = (__m128i *) aligned;\r
+ pSwData->pvsQueryProf = pSwData->pvbQueryProf + lenQryByte * ALPHA_SIZE;\r
+\r
+ pSwData->pvH1 = pSwData->pvsQueryProf + lenQryShort * ALPHA_SIZE;\r
+ pSwData->pvH2 = pSwData->pvH1 + lenQryShort;\r
+ pSwData->pvE = pSwData->pvH2 + lenQryShort;\r
+\r
+ /* Use a scoring profile for the SSE2 implementation, but the layout\r
+ * is a bit strange. The scoring profile is parallel to the query, but is\r
+ * accessed in a stripped pattern. The query is divided into equal length\r
+ * segments. The number of segments is equal to the number of elements\r
+ * processed in the SSE2 register. For 8-bit calculations, the query will\r
+ * be divided into 16 equal length parts. If the query is not long enough\r
+ * to fill the last segment, it will be filled with neutral weights. The\r
+ * first element in the SSE register will hold a value from the first segment,\r
+ * the second element of the SSE register will hold a value from the\r
+ * second segment and so on. So if the query length is 288, then each\r
+ * segment will have a length of 18. So the first 16 bytes will have\r
+ * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will\r
+ * have the following weights: Q2, Q20, Q38, ... Q272; and so on until\r
+ * all parts of all segments have been written. The last seqment will\r
+ * have the following weights: Q18, Q36, Q54, ... Q288. This will be\r
+ * done for the entire alphabet.\r
+ */\r
+\r
+ /* Find the bias to use in the substitution matrix */\r
+ bias = 127;\r
+ for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) {\r
+ if (matrix[i] < bias) {\r
+ bias = matrix[i];\r
+ }\r
+ }\r
+ if (bias > 0) {\r
+ bias = 0;\r
+ }\r
+\r
+ /* Fill in the byte query profile */\r
+ pc = (char *) pSwData->pvbQueryProf;\r
+ segSize = (queryLength + 15) / 16;\r
+ nCount = segSize * 16;\r
+ for (i = 0; i < ALPHA_SIZE; ++i) {\r
+ matrixRow = matrix + i * ALPHA_SIZE;\r
+ for (j = 0; j < segSize; ++j) {\r
+ for (k = j; k < nCount; k += segSize) {\r
+ if (k >= queryLength) {\r
+ weight = 0;\r
+ } else {\r
+ weight = matrixRow[*(querySeq + k)];\r
+ }\r
+ *pc++ = (char) (weight - bias);\r
+ }\r
+ }\r
+ }\r
+\r
+ /* Fill in the short query profile */\r
+ ps = (short *) pSwData->pvsQueryProf;\r
+ segSize = (queryLength + 7) / 8;\r
+ nCount = segSize * 8;\r
+ for (i = 0; i < ALPHA_SIZE; ++i) {\r
+ matrixRow = matrix + i * ALPHA_SIZE;\r
+ for (j = 0; j < segSize; ++j) {\r
+ for (k = j; k < nCount; k += segSize) {\r
+ if (k >= queryLength) {\r
+ weight = 0;\r
+ } else {\r
+ weight = matrixRow[*(querySeq + k)];\r
+ }\r
+ *ps++ = (unsigned short) weight;\r
+ }\r
+ }\r
+ }\r
+\r
+ pSwData->bias = (unsigned short) -bias;\r
+\r
+ return pSwData;\r
+}\r
+\r
+void swStripedScan (unsigned char *querySeq,\r
+ int queryLength,\r
+ FASTA_LIB *dbLib,\r
+ void *swData,\r
+ SEARCH_OPTIONS *options,\r
+ SCORE_LIST *scores)\r
+{\r
+ int score;\r
+\r
+ int threshold = options->threshold;\r
+\r
+ unsigned char *dbSeq;\r
+ int dbLen;\r
+\r
+ int gapInit = -(options->gapInit + options->gapExt);\r
+ int gapExt = -options->gapExt;\r
+\r
+ SwStripedData *stripedData = (SwStripedData *) swData;\r
+\r
+ dbSeq = nextSeq (dbLib, &dbLen);\r
+ while (dbLen > 0) {\r
+\r
+ score = swStripedByte (querySeq, queryLength, \r
+ dbSeq, dbLen, \r
+ gapInit, gapExt, \r
+ stripedData->pvbQueryProf,\r
+ stripedData->pvH1,\r
+ stripedData->pvH2,\r
+ stripedData->pvE,\r
+ stripedData->bias);\r
+\r
+ /* check if needs a run with higher precision */\r
+ if (score >= 255) {\r
+ score = swStripedWord (querySeq, queryLength, \r
+ dbSeq, dbLen, \r
+ gapInit, gapExt, \r
+ stripedData->pvsQueryProf,\r
+ stripedData->pvH1,\r
+ stripedData->pvH2,\r
+ stripedData->pvE);\r
+ }\r
+\r
+ if (score >= threshold) {\r
+ int minScore = insertList (scores, score, seqName (dbLib));\r
+ if (minScore >= threshold) {\r
+ threshold = minScore;\r
+ }\r
+ }\r
+\r
+ dbSeq = nextSeq (dbLib, &dbLen);\r
+ }\r
+}\r
+\r
+void\r
+swStripedComplete(void *pSwData)\r
+{\r
+ SwStripedData *pStripedData = (SwStripedData *) pSwData;\r
+\r
+ free (pStripedData->pData);\r
+ free (pStripedData);\r
+}\r
+\r
+int\r
+swStripedWord(unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ __m128i *pvQueryProf,\r
+ __m128i *pvHLoad,\r
+ __m128i *pvHStore,\r
+ __m128i *pvE)\r
+{\r
+ int i, j;\r
+ int score;\r
+\r
+ int cmp;\r
+ int iter = (queryLength + 7) / 8;\r
+ \r
+ __m128i *pv;\r
+\r
+ __m128i vE, vF, vH;\r
+\r
+ __m128i vMaxScore;\r
+ __m128i vGapOpen;\r
+ __m128i vGapExtend;\r
+\r
+ __m128i vMin;\r
+ __m128i vMinimums;\r
+ __m128i vTemp;\r
+\r
+ __m128i *pvScore;\r
+\r
+ /* remove unreferenced warning */\r
+ querySeq;\r
+\r
+ /* Load gap opening penalty to all elements of a constant */\r
+ vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0);\r
+ vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
+ vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
+\r
+ /* Load gap extension penalty to all elements of a constant */\r
+ vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0);\r
+ vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
+ vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
+\r
+ /* load vMaxScore with the zeros. since we are using signed */\r
+ /* math, we will bias the maxscore to -32768 so we have the */\r
+ /* full range of the short. */\r
+ vMaxScore = _mm_cmpeq_epi16 (vMaxScore, vMaxScore);\r
+ vMaxScore = _mm_slli_epi16 (vMaxScore, 15);\r
+\r
+ vMinimums = _mm_shuffle_epi32 (vMaxScore, 0);\r
+\r
+ vMin = _mm_shuffle_epi32 (vMaxScore, 0);\r
+ vMin = _mm_srli_si128 (vMin, 14);\r
+\r
+ /* Zero out the storage vector */\r
+ for (i = 0; i < iter; i++)\r
+ {\r
+ _mm_store_si128 (pvE + i, vMaxScore);\r
+ _mm_store_si128 (pvHStore + i, vMaxScore);\r
+ }\r
+\r
+ for (i = 0; i < dbLength; ++i)\r
+ {\r
+ /* fetch first data asap. */\r
+ pvScore = pvQueryProf + dbSeq[i] * iter;\r
+\r
+ /* zero out F. */\r
+ vF = _mm_cmpeq_epi16 (vF, vF);\r
+ vF = _mm_slli_epi16 (vF, 15);\r
+\r
+ /* load the next h value */\r
+ vH = _mm_load_si128 (pvHStore + iter - 1);\r
+ vH = _mm_slli_si128 (vH, 2);\r
+ vH = _mm_or_si128 (vH, vMin);\r
+\r
+ pv = pvHLoad;\r
+ pvHLoad = pvHStore;\r
+ pvHStore = pv;\r
+\r
+ for (j = 0; j < iter; j++)\r
+ {\r
+ /* load values of vF and vH from previous row (one unit up) */\r
+ vE = _mm_load_si128 (pvE + j);\r
+\r
+ /* add score to vH */\r
+ vH = _mm_adds_epi16 (vH, *pvScore++);\r
+\r
+ /* Update highest score encountered this far */\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vH);\r
+\r
+ /* get max from vH, vE and vF */\r
+ vH = _mm_max_epi16 (vH, vE);\r
+ vH = _mm_max_epi16 (vH, vF);\r
+\r
+ /* save vH values */\r
+ _mm_store_si128 (pvHStore + j, vH);\r
+\r
+ /* update vE value */\r
+ vH = _mm_subs_epi16 (vH, vGapOpen);\r
+ vE = _mm_subs_epi16 (vE, vGapExtend);\r
+ vE = _mm_max_epi16 (vE, vH);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epi16 (vF, vGapExtend);\r
+ vF = _mm_max_epi16 (vF, vH);\r
+\r
+ /* save vE values */\r
+ _mm_store_si128 (pvE + j, vE);\r
+\r
+ /* load the next h value */\r
+ vH = _mm_load_si128 (pvHLoad + j);\r
+ }\r
+\r
+ /* reset pointers to the start of the saved data */\r
+ j = 0;\r
+ vH = _mm_load_si128 (pvHStore + j);\r
+\r
+ /* the computed vF value is for the given column. since */\r
+ /* we are at the end, we need to shift the vF value over */\r
+ /* to the next column. */\r
+ vF = _mm_slli_si128 (vF, 2);\r
+ vF = _mm_or_si128 (vF, vMin);\r
+ vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
+ vTemp = _mm_cmpgt_epi16 (vF, vTemp);\r
+ cmp = _mm_movemask_epi8 (vTemp);\r
+ while (cmp != 0x0000) \r
+ {\r
+ vE = _mm_load_si128 (pvE + j);\r
+\r
+ vH = _mm_max_epi16 (vH, vF);\r
+\r
+ /* save vH values */\r
+ _mm_store_si128 (pvHStore + j, vH);\r
+\r
+ /* update vE incase the new vH value would change it */\r
+ vH = _mm_subs_epi16 (vH, vGapOpen);\r
+ vE = _mm_max_epi16 (vE, vH);\r
+ _mm_store_si128 (pvE + j, vE);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epi16 (vF, vGapExtend);\r
+\r
+ j++;\r
+ if (j >= iter)\r
+ {\r
+ j = 0;\r
+ vF = _mm_slli_si128 (vF, 2);\r
+ vF = _mm_or_si128 (vF, vMin);\r
+ }\r
+\r
+ vH = _mm_load_si128 (pvHStore + j);\r
+\r
+ vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
+ vTemp = _mm_cmpgt_epi16 (vF, vTemp);\r
+ cmp = _mm_movemask_epi8 (vTemp);\r
+ }\r
+ }\r
+\r
+ /* find largest score in the vMaxScore vector */\r
+ vTemp = _mm_srli_si128 (vMaxScore, 8);\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 4);\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 2);\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
+\r
+ /* store in temporary variable */\r
+ score = (short) _mm_extract_epi16 (vMaxScore, 0);\r
+\r
+ /* return largest score */\r
+ return score + SHORT_BIAS;\r
+}\r
+\r
+\r
+\r
+\r
+int\r
+swStripedByte(unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ __m128i *pvQueryProf,\r
+ __m128i *pvHLoad,\r
+ __m128i *pvHStore,\r
+ __m128i *pvE,\r
+ unsigned short bias)\r
+{\r
+ int i, j;\r
+ int score;\r
+\r
+ int dup;\r
+ int cmp;\r
+ int iter = (queryLength + 15) / 16;\r
+ \r
+ __m128i *pv;\r
+\r
+ __m128i vE, vF, vH;\r
+\r
+ __m128i vMaxScore;\r
+ __m128i vBias;\r
+ __m128i vGapOpen;\r
+ __m128i vGapExtend;\r
+\r
+ __m128i vTemp;\r
+ __m128i vZero;\r
+\r
+ __m128i *pvScore;\r
+\r
+ /* remove unreferenced warning */\r
+ querySeq;\r
+\r
+ /* Load the bias to all elements of a constant */\r
+ dup = (bias << 8) | (bias & 0x00ff);\r
+ vBias = _mm_insert_epi16 (vBias, dup, 0);\r
+ vBias = _mm_shufflelo_epi16 (vBias, 0);\r
+ vBias = _mm_shuffle_epi32 (vBias, 0);\r
+\r
+ /* Load gap opening penalty to all elements of a constant */\r
+ dup = (gapOpen << 8) | (gapOpen & 0x00ff);\r
+ vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0);\r
+ vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
+ vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
+\r
+ /* Load gap extension penalty to all elements of a constant */\r
+ dup = (gapExtend << 8) | (gapExtend & 0x00ff);\r
+ vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0);\r
+ vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
+ vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
+\r
+ vMaxScore = _mm_xor_si128 (vMaxScore, vMaxScore);\r
+\r
+ vZero = _mm_xor_si128 (vZero, vZero);\r
+\r
+ /* Zero out the storage vector */\r
+ for (i = 0; i < iter; i++)\r
+ {\r
+ _mm_store_si128 (pvE + i, vMaxScore);\r
+ _mm_store_si128 (pvHStore + i, vMaxScore);\r
+ }\r
+\r
+ for (i = 0; i < dbLength; ++i)\r
+ {\r
+ /* fetch first data asap. */\r
+ pvScore = pvQueryProf + dbSeq[i] * iter;\r
+\r
+ /* zero out F. */\r
+ vF = _mm_xor_si128 (vF, vF);\r
+\r
+ /* load the next h value */\r
+ vH = _mm_load_si128 (pvHStore + iter - 1);\r
+ vH = _mm_slli_si128 (vH, 1);\r
+\r
+ pv = pvHLoad;\r
+ pvHLoad = pvHStore;\r
+ pvHStore = pv;\r
+\r
+ for (j = 0; j < iter; j++)\r
+ {\r
+ /* load values of vF and vH from previous row (one unit up) */\r
+ vE = _mm_load_si128 (pvE + j);\r
+\r
+ /* add score to vH */\r
+ vH = _mm_adds_epu8 (vH, *(pvScore + j));\r
+ vH = _mm_subs_epu8 (vH, vBias);\r
+\r
+ /* Update highest score encountered this far */\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vH);\r
+\r
+ /* get max from vH, vE and vF */\r
+ vH = _mm_max_epu8 (vH, vE);\r
+ vH = _mm_max_epu8 (vH, vF);\r
+\r
+ /* save vH values */\r
+ _mm_store_si128 (pvHStore + j, vH);\r
+\r
+ /* update vE value */\r
+ vH = _mm_subs_epu8 (vH, vGapOpen);\r
+ vE = _mm_subs_epu8 (vE, vGapExtend);\r
+ vE = _mm_max_epu8 (vE, vH);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epu8 (vF, vGapExtend);\r
+ vF = _mm_max_epu8 (vF, vH);\r
+\r
+ /* save vE values */\r
+ _mm_store_si128 (pvE + j, vE);\r
+\r
+ /* load the next h value */\r
+ vH = _mm_load_si128 (pvHLoad + j);\r
+ }\r
+\r
+ /* reset pointers to the start of the saved data */\r
+ j = 0;\r
+ vH = _mm_load_si128 (pvHStore + j);\r
+\r
+ /* the computed vF value is for the given column. since */\r
+ /* we are at the end, we need to shift the vF value over */\r
+ /* to the next column. */\r
+ vF = _mm_slli_si128 (vF, 1);\r
+ vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
+ vTemp = _mm_subs_epu8 (vF, vTemp);\r
+ vTemp = _mm_cmpeq_epi8 (vTemp, vZero);\r
+ cmp = _mm_movemask_epi8 (vTemp);\r
+\r
+ while (cmp != 0xffff) \r
+ {\r
+ vE = _mm_load_si128 (pvE + j);\r
+\r
+ vH = _mm_max_epu8 (vH, vF);\r
+\r
+ /* save vH values */\r
+ _mm_store_si128 (pvHStore + j, vH);\r
+\r
+ /* update vE incase the new vH value would change it */\r
+ vH = _mm_subs_epu8 (vH, vGapOpen);\r
+ vE = _mm_max_epu8 (vE, vH);\r
+ _mm_store_si128 (pvE + j, vE);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epu8 (vF, vGapExtend);\r
+\r
+ j++;\r
+ if (j >= iter)\r
+ {\r
+ j = 0;\r
+ vF = _mm_slli_si128 (vF, 1);\r
+ }\r
+\r
+ vH = _mm_load_si128 (pvHStore + j);\r
+\r
+ vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
+ vTemp = _mm_subs_epu8 (vF, vTemp);\r
+ vTemp = _mm_cmpeq_epi8 (vTemp, vZero);\r
+ cmp = _mm_movemask_epi8 (vTemp);\r
+ }\r
+ }\r
+\r
+ /* find largest score in the vMaxScore vector */\r
+ vTemp = _mm_srli_si128 (vMaxScore, 8);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 4);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 2);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 1);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+\r
+ /* store in temporary variable */\r
+ score = _mm_extract_epi16 (vMaxScore, 0);\r
+ score = score & 0x00ff;\r
+\r
+ /* check if we might have overflowed */\r
+ if (score + bias >= 255)\r
+ {\r
+ score = 255;\r
+ }\r
+\r
+ /* return largest score */\r
+ return score;\r
+}\r
--- /dev/null
+/******************************************************************
+ Copyright 2006 by Michael Farrar. All rights reserved.
+ This program may not be sold or incorporated into a commercial product,
+ in whole or in part, without written consent of Michael Farrar. For
+ further information regarding permission for use or reproduction, please
+ contact: Michael Farrar at farrar.michael@gmail.com.
+*******************************************************************/
+
+/*
+ Written by Michael Farrar, 2006.
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.
+*/
+
+#ifndef INCLUDE_SWSTRIPED_H
+#define INCLUDE_SWSTRIPED_H
+
+#include <emmintrin.h>
+
+#include "swsse2.h"
+#include "fastalib.h"
+
+SW_DATA *
+swStripedInit (unsigned char *querySeq,
+ int queryLength,
+ signed char *matrix);
+
+
+void
+swStripedScan (unsigned char *querySeq,
+ int queryLength,
+ FASTA_LIB *dbLib,
+ void *swData,
+ SEARCH_OPTIONS *options,
+ SCORE_LIST *scores);
+
+
+void
+swStripedComplete (SW_DATA *pSwData);
+
+int
+swStripedWord (unsigned char *querySeq,
+ int queryLength,
+ unsigned char *dbSeq,
+ int dbLength,
+ unsigned short gapOpen,
+ unsigned short gapExtend,
+ __m128i *queryProf,
+ __m128i *pvH1,
+ __m128i *pvH2,
+ __m128i *pvE);
+
+
+int
+swStripedByte (unsigned char *querySeq,
+ int queryLength,
+ unsigned char *dbSeq,
+ int dbLength,
+ unsigned short gapOpen,
+ unsigned short gapExtend,
+ __m128i *queryProf,
+ __m128i *pvH1,
+ __m128i *pvH2,
+ __m128i *pvE,
+ unsigned short bias);
+
+typedef struct {
+ __m128i *pvbQueryProf;
+ __m128i *pvsQueryProf;
+ __m128i *pvH1;
+ __m128i *pvH2;
+ __m128i *pvE;
+ unsigned char *pData;
+ unsigned short bias;
+} SwStripedData;
+
+
+
+#endif /* INCLUDE_SWSTRIPED_H */
--- /dev/null
+/******************************************************************\r
+ Copyright 2006 by Michael Farrar. All rights reserved.\r
+ This program may not be sold or incorporated into a commercial product,\r
+ in whole or in part, without written consent of Michael Farrar. For \r
+ further information regarding permission for use or reproduction, please \r
+ contact: Michael Farrar at farrar.michael@gmail.com.\r
+*******************************************************************/\r
+\r
+/* Written by Michael Farrar, 2006.\r
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
+*/\r
+\r
+/* Implementation of the Wozniak "vertical" vectorization\r
+ strategy for Smith-Waterman comparison, Wozniak (1997) Comp.\r
+ Appl. Biosci. 13:145-150\r
+*/\r
+\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <emmintrin.h>\r
+\r
+#include "swsse2.h"\r
+#include "swwozniak.h"\r
+\r
+#define MATRIX_ROW_SIZE 32\r
+#define MATRIX_SIZE (MATRIX_ROW_SIZE * (ALPHA_SIZE + 1))\r
+\r
+typedef struct {\r
+ char *pbMatrix;\r
+ short *psMatrix;\r
+ __m128i *pvHStore;\r
+ __m128i *pvEStore;\r
+ unsigned char *pData;\r
+ unsigned short bias;\r
+} SwWozniakData;\r
+\r
+int\r
+swWozniakWord (unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ short *pMatrix,\r
+ __m128i *pvHStore,\r
+ __m128i *pvEStore);\r
+\r
+int\r
+swWozniakByte (unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ char *pMatrix,\r
+ __m128i *pvHStore,\r
+ __m128i *pvEStore,\r
+ unsigned short bias);\r
+\r
+void *\r
+swWozniakInit(unsigned char *querySeq,\r
+ int queryLength,\r
+ signed char *matrix)\r
+{\r
+ int i, j;\r
+\r
+ int nCount;\r
+\r
+ int lenQryByte;\r
+ int lenQryShort;\r
+\r
+ int bias;\r
+\r
+ int weight;\r
+\r
+ short *ps;\r
+ char *pc;\r
+\r
+ signed char *matrixRow;\r
+\r
+ size_t aligned;\r
+\r
+ SwWozniakData *pSwData;\r
+ \r
+ lenQryByte = (queryLength + 15) / 16 + 2;\r
+ lenQryShort = (queryLength + 7) / 8 + 2;\r
+\r
+ pSwData = (SwWozniakData *) malloc (sizeof (SwWozniakData));\r
+ if (!pSwData) {\r
+ fprintf (stderr, "Unable to allocate memory for SW data\n");\r
+ exit (-1);\r
+ }\r
+\r
+ nCount = 64 + /* slack bytes */\r
+ 4 * (ALPHA_SIZE + 1) + /* byte matrix */\r
+ 4 * (ALPHA_SIZE + 1) + /* short matrix */\r
+ ((queryLength + 16) * 2); /* vH and vE */\r
+\r
+\r
+ pSwData->pData = (unsigned char *) calloc (nCount, sizeof (__m128i));\r
+ if (!pSwData->pData) {\r
+ fprintf (stderr, "Unable to allocate memory for SW data buffers\n");\r
+ exit (-1);\r
+ }\r
+\r
+ pSwData->pbMatrix = (char *) pSwData->pData;\r
+ pSwData->psMatrix = (short *) (pSwData->pbMatrix + MATRIX_SIZE);\r
+\r
+ /* since we might port this to another platform, lets align the data */\r
+ /* to 16 byte boundries ourselves */\r
+ aligned = (size_t) (pSwData->psMatrix + MATRIX_SIZE);\r
+ aligned = (aligned + 15) & ~(0x0f);\r
+\r
+ pSwData->pvHStore = (__m128i *) aligned;\r
+ pSwData->pvEStore = pSwData->pvHStore + queryLength + 16;\r
+\r
+ /* Find the bias to use in the substitution matrix */\r
+ bias = 127;\r
+ for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) {\r
+ if (matrix[i] < bias) {\r
+ bias = matrix[i];\r
+ }\r
+ }\r
+ if (bias > 0) {\r
+ bias = 0;\r
+ }\r
+\r
+ pc = pSwData->pbMatrix;\r
+ ps = pSwData->psMatrix;\r
+\r
+ for (i = 0; i < ALPHA_SIZE; i++) {\r
+ matrixRow = matrix + i * ALPHA_SIZE;\r
+\r
+ for (j = 0; j < ALPHA_SIZE; j++) {\r
+ weight = matrixRow[j];\r
+ *pc++ = weight - bias;\r
+ *ps++ = weight;\r
+ }\r
+\r
+ for ( ; j < MATRIX_ROW_SIZE; j++) {\r
+ *pc++ = -bias;\r
+ *ps++ = 0;\r
+ }\r
+ }\r
+\r
+ /* add the weights for the NULL rows */\r
+ for (j = 0; j < MATRIX_ROW_SIZE; j++) {\r
+ *pc++ = -bias;\r
+ *ps++ = 0;\r
+ }\r
+\r
+ pSwData->bias = (unsigned short) -bias;\r
+\r
+ return pSwData;\r
+}\r
+\r
+void swWozniakScan (unsigned char *querySeq,\r
+ int queryLength,\r
+ FASTA_LIB *dbLib,\r
+ void *swData,\r
+ SEARCH_OPTIONS *options,\r
+ SCORE_LIST *scores)\r
+{\r
+ int score;\r
+\r
+ int threshold = options->threshold;\r
+\r
+ unsigned char *dbSeq;\r
+ int dbLen;\r
+\r
+ int gapInit = -(options->gapInit + options->gapExt);\r
+ int gapExt = -options->gapExt;\r
+\r
+ SwWozniakData *wozniakData = (SwWozniakData *) swData;\r
+\r
+ dbSeq = nextSeq (dbLib, &dbLen);\r
+ while (dbLen > 0) {\r
+\r
+ score = swWozniakByte (querySeq, queryLength, \r
+ dbSeq, dbLen, \r
+ gapInit, gapExt, \r
+ wozniakData->pbMatrix,\r
+ wozniakData->pvHStore,\r
+ wozniakData->pvEStore,\r
+ wozniakData->bias);\r
+\r
+ /* check if needs a run with higher precision */\r
+ if (score >= 255) {\r
+ score = swWozniakWord (querySeq, queryLength, \r
+ dbSeq, dbLen, \r
+ gapInit, gapExt, \r
+ wozniakData->psMatrix,\r
+ wozniakData->pvHStore,\r
+ wozniakData->pvEStore);\r
+ }\r
+\r
+ if (score >= threshold) {\r
+ int minScore = insertList (scores, score, seqName (dbLib));\r
+ if (minScore >= threshold) {\r
+ threshold = minScore;\r
+ }\r
+ }\r
+\r
+ dbSeq = nextSeq (dbLib, &dbLen);\r
+ }\r
+}\r
+\r
+void\r
+swWozniakComplete(void *pSwData)\r
+{\r
+ SwWozniakData *pWozniakData = (SwWozniakData *) pSwData;\r
+\r
+ free (pWozniakData->pData);\r
+ free (pWozniakData);\r
+}\r
+\r
+\r
+int\r
+swWozniakWord (unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ short *pMatrix,\r
+ __m128i *pvHStore,\r
+ __m128i *pvEStore)\r
+{\r
+ int i, j, k, l, m;\r
+ int score;\r
+\r
+ short *pScore;\r
+\r
+ __m128i vE, vF, vH;\r
+ __m128i vEUp, vHUp1, vHUp2;\r
+\r
+ __m128i vMaxScore;\r
+ __m128i vGapOpen;\r
+ __m128i vGapExtend;\r
+ __m128i vScore;\r
+\r
+ __m128i vMin;\r
+ __m128i vMinimums;\r
+ __m128i vTemp;\r
+\r
+ /* remove unreferenced warning */\r
+ querySeq;\r
+\r
+ /* Load gap opening penalty to all elements of a constant */\r
+ vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0);\r
+ vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
+ vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
+\r
+ /* Load gap extension penalty to all elements of a constant */\r
+ vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0);\r
+ vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
+ vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
+\r
+ /* load vMaxScore with the zeros. since we are using signed */\r
+ /* math, we will bias the maxscore to -32768 so we have the */\r
+ /* full range of the short. */\r
+ vMaxScore = _mm_cmpeq_epi16 (vMaxScore, vMaxScore);\r
+ vMaxScore = _mm_slli_epi16 (vMaxScore, 15);\r
+\r
+ vMinimums = _mm_shuffle_epi32 (vMaxScore, 0);\r
+\r
+ vMin = _mm_shuffle_epi32 (vMaxScore, 0);\r
+ vMin = _mm_srli_si128 (vMin, 14);\r
+\r
+ for (i = 0; i < queryLength + 8; i++)\r
+ {\r
+ _mm_store_si128 (pvEStore + i, vMaxScore);\r
+ _mm_store_si128 (pvHStore + i, vMaxScore);\r
+ }\r
+\r
+ pScore = (short *) &vScore;\r
+\r
+ for (i = 0; i < dbLength; i += 8, dbSeq += 8)\r
+ {\r
+ /* zero lots of stuff. */\r
+ vE = _mm_shuffle_epi32 (vMinimums, 0);\r
+ vF = _mm_shuffle_epi32 (vMinimums, 0);\r
+ vH = _mm_shuffle_epi32 (vMinimums, 0);\r
+ vHUp2 = _mm_shuffle_epi32 (vMinimums, 0);\r
+\r
+ vScore = _mm_xor_si128 (vScore, vScore);\r
+\r
+ for (j = 0; j < 8; ++j)\r
+ {\r
+ for (k = 0; k <= j; ++k) {\r
+ int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
+ pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
+ }\r
+ for ( ; k < 8; ++k) {\r
+ pScore[k] = 0;\r
+ }\r
+\r
+ /* load values of vE and vH from previous row (one unit up) */\r
+ vEUp = _mm_load_si128 (pvEStore + j);\r
+ vHUp1 = _mm_load_si128 (pvHStore + j);\r
+\r
+ /* shift into place so we have complete vE and vH vectors */\r
+ /* that refer to the values one unit up from each cell */\r
+ /* that we are currently working on. */\r
+ vTemp = _mm_slli_si128 (vE, 2);\r
+ vEUp = _mm_srli_si128 (vEUp, 14);\r
+ vEUp = _mm_or_si128 (vEUp, vTemp);\r
+\r
+ vTemp = _mm_slli_si128 (vH, 2);\r
+ vHUp1 = _mm_srli_si128 (vHUp1, 14);\r
+ vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
+\r
+ /* do the dynamic programming */\r
+\r
+ /* update vE value */\r
+ vE = _mm_subs_epi16 (vEUp, vGapExtend);\r
+ vTemp = _mm_subs_epi16 (vHUp1, vGapOpen);\r
+ vE = _mm_max_epi16 (vE, vTemp);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epi16 (vF, vGapExtend);\r
+ vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
+ vF = _mm_max_epi16 (vF, vTemp);\r
+\r
+ /* add score to vH */\r
+ vH = _mm_adds_epi16 (vHUp2, vScore);\r
+\r
+ /* set vH to max of vH, vE, vF */\r
+ vH = _mm_max_epi16 (vH, vE);\r
+ vH = _mm_max_epi16 (vH, vF);\r
+\r
+ /* Save value to use for next diagonal vH */\r
+ vHUp2 = vHUp1;\r
+\r
+ /* Update highest score encountered this far */\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vH);\r
+ }\r
+\r
+ for (l = 0; j < queryLength; ++j, ++l)\r
+ {\r
+ for (k = 0; k < 8; ++k) {\r
+ int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
+ pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
+ }\r
+\r
+ /* load values of vE and vH from previous row (one unit up) */\r
+ vEUp = _mm_load_si128 (pvEStore + j);\r
+ vHUp1 = _mm_load_si128 (pvHStore + j);\r
+\r
+ /* save old values of vE and vH to use on next row */\r
+ _mm_store_si128 (pvEStore + l, vE);\r
+ _mm_store_si128 (pvHStore + l, vH);\r
+ \r
+ /* shift into place so we have complete vE and vH vectors */\r
+ /* that refer to the values one unit up from each cell */\r
+ /* that we are currently working on. */\r
+ vTemp = _mm_slli_si128 (vE, 2);\r
+ vEUp = _mm_srli_si128 (vEUp, 14);\r
+ vEUp = _mm_or_si128 (vEUp, vTemp);\r
+\r
+ vTemp = _mm_slli_si128 (vH, 2);\r
+ vHUp1 = _mm_srli_si128 (vHUp1, 14);\r
+ vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
+\r
+ /* do the dynamic programming */\r
+\r
+ /* update vE value */\r
+ vE = _mm_subs_epi16 (vEUp, vGapExtend);\r
+ vTemp = _mm_subs_epi16 (vHUp1, vGapOpen);\r
+ vE = _mm_max_epi16 (vE, vTemp);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epi16 (vF, vGapExtend);\r
+ vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
+ vF = _mm_max_epi16 (vF, vTemp);\r
+\r
+ /* add score to vH */\r
+ vH = _mm_adds_epi16(vHUp2, vScore);\r
+\r
+ /* set vH to max of vH, vE, vF */\r
+ vH = _mm_max_epi16 (vH, vE);\r
+ vH = _mm_max_epi16 (vH, vF);\r
+\r
+ /* Save value to use for next diagonal vH */\r
+ vHUp2 = vHUp1;\r
+\r
+ /* Update highest score encountered this far */\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vH);\r
+ }\r
+\r
+ for (m = 0 ; m < 7; ++j, ++l, ++m)\r
+ {\r
+ for (k = 0; k <= m; ++k) {\r
+ pScore[k] = 0;\r
+ }\r
+ for ( ; k < 8; ++k) {\r
+ int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
+ pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
+ }\r
+\r
+ /* save old values of vE and vH to use on next row */\r
+ _mm_store_si128 (pvEStore + l, vE);\r
+ _mm_store_si128 (pvHStore + l, vH);\r
+\r
+ /* v_score_load contains all zeros */\r
+ vTemp = _mm_slli_si128 (vE, 2);\r
+ vEUp = _mm_or_si128 (vMin, vTemp);\r
+ vTemp = _mm_slli_si128 (vH, 2);\r
+ vHUp1 = _mm_or_si128 (vMin, vTemp);\r
+\r
+ /* do the dynamic programming */\r
+\r
+ /* update vE value */\r
+ vE = _mm_subs_epi16 (vEUp, vGapExtend);\r
+ vTemp = _mm_subs_epi16 (vHUp1, vGapOpen);\r
+ vE = _mm_max_epi16 (vE, vTemp);\r
+\r
+ /* update vF value */\r
+ vF = _mm_subs_epi16 (vF, vGapExtend);\r
+ vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
+ vF = _mm_max_epi16 (vF, vTemp);\r
+\r
+ /* add score to vH */\r
+ vH = _mm_adds_epi16 (vHUp2, vScore);\r
+\r
+ /* set vH to max of vH, vE, vF */\r
+ vH = _mm_max_epi16 (vH, vE);\r
+ vH = _mm_max_epi16 (vH, vF);\r
+\r
+ /* Save value to use for next diagonal vH */\r
+ vHUp2 = vHUp1;\r
+\r
+ /* Update highest score encountered this far */\r
+ vMaxScore = _mm_max_epi16(vMaxScore,vH);\r
+ }\r
+\r
+ _mm_store_si128 (pvEStore + l, vE);\r
+ _mm_store_si128 (pvHStore + l, vH);\r
+ }\r
+\r
+ /* find largest score in the vMaxScore vector */\r
+ vTemp = _mm_srli_si128 (vMaxScore, 8);\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 4);\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 2);\r
+ vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
+\r
+ /* store in temporary variable */\r
+ score = (short) _mm_extract_epi16 (vMaxScore, 0);\r
+\r
+ /* return largest score */\r
+ return score + SHORT_BIAS;\r
+}\r
+\r
+\r
+\r
+\r
+int\r
+swWozniakByte (unsigned char *querySeq,\r
+ int queryLength,\r
+ unsigned char *dbSeq,\r
+ int dbLength,\r
+ unsigned short gapOpen,\r
+ unsigned short gapExtend,\r
+ char *pMatrix,\r
+ __m128i *pvHStore,\r
+ __m128i *pvEStore,\r
+ unsigned short bias)\r
+{\r
+ int i, j, k, l, m;\r
+ int score;\r
+\r
+ int dup;\r
+\r
+ char *pScore;\r
+\r
+ __m128i vE, vF, vH;\r
+ __m128i vEUp, vHUp1, vHUp2;\r
+\r
+ __m128i vMaxScore;\r
+ __m128i vBias;\r
+ __m128i vGapOpen;\r
+ __m128i vGapExtend;\r
+ __m128i vScore;\r
+\r
+ __m128i vTemp;\r
+\r
+ /* remove unreferenced warning */\r
+ querySeq;\r
+\r
+ /* Load the bias to all elements of a constant */\r
+ dup = (bias << 8) | (bias & 0x00ff);\r
+ vBias = _mm_insert_epi16 (vBias, dup, 0);\r
+ vBias = _mm_shufflelo_epi16 (vBias, 0);\r
+ vBias = _mm_shuffle_epi32 (vBias, 0);\r
+\r
+ /* Load gap opening penalty to all elements of a constant */\r
+ dup = (gapOpen << 8) | (gapOpen & 0x00ff);\r
+ vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0);\r
+ vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
+ vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
+\r
+ /* Load gap extension penalty to all elements of a constant */\r
+ dup = (gapExtend << 8) | (gapExtend & 0x00ff);\r
+ vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0);\r
+ vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
+ vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
+\r
+ vScore = _mm_xor_si128 (vScore, vScore);\r
+ vMaxScore = _mm_xor_si128 (vMaxScore, vMaxScore);\r
+\r
+ for (i = 0; i < queryLength + 16; i++)\r
+ {\r
+ _mm_store_si128 (pvEStore + i, vMaxScore);\r
+ _mm_store_si128 (pvHStore + i, vMaxScore);\r
+ }\r
+\r
+ pScore = (char *) &vScore;\r
+\r
+ for (i = 0; i < dbLength; i += 16, dbSeq += 16)\r
+ {\r
+ // zero lots of stuff.\r
+ vE = _mm_xor_si128 (vE, vE);\r
+ vF = _mm_xor_si128 (vF, vF);\r
+ vH = _mm_xor_si128 (vH, vH);\r
+ vHUp2 = _mm_xor_si128 (vHUp2, vHUp2);\r
+\r
+ vScore = _mm_xor_si128 (vScore, vScore);\r
+\r
+ for (j = 0; j < 16; ++j)\r
+ {\r
+ for (k = 0; k <= j; ++k) {\r
+ int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
+ pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
+ }\r
+ for ( ; k < 16; ++k) {\r
+ pScore[k] = (char) bias;\r
+ }\r
+\r
+ // load values of vE and vH from previous row (one unit up)\r
+ vEUp = _mm_load_si128 (pvEStore + j);\r
+ vHUp1 = _mm_load_si128 (pvHStore + j);\r
+\r
+ // shift into place so we have complete vE and vH vectors\r
+ // that refer to the values one unit up from each cell\r
+ // that we are currently working on.\r
+ vTemp = _mm_slli_si128 (vE, 1);\r
+ vEUp = _mm_srli_si128 (vEUp, 15);\r
+ vEUp = _mm_or_si128 (vEUp, vTemp);\r
+\r
+ vTemp = _mm_slli_si128 (vH, 1);\r
+ vHUp1 = _mm_srli_si128 (vHUp1, 15);\r
+ vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
+\r
+ // do the dynamic programming\r
+\r
+ // update vE value\r
+ vE = _mm_subs_epu8 (vEUp, vGapExtend);\r
+ vTemp = _mm_subs_epu8 (vHUp1, vGapOpen);\r
+ vE = _mm_max_epu8 (vE, vTemp);\r
+\r
+ // update vF value\r
+ vF = _mm_subs_epu8 (vF, vGapExtend);\r
+ vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
+ vF = _mm_max_epu8 (vF, vTemp);\r
+\r
+ // add score to vH\r
+ vH = _mm_adds_epu8 (vHUp2, *((__m128i *) pScore));\r
+ vH = _mm_subs_epu8 (vH, vBias);\r
+\r
+ // set vH to max of vH, vE, vF\r
+ vH = _mm_max_epu8 (vH, vE);\r
+ vH = _mm_max_epu8 (vH, vF);\r
+\r
+ // Save value to use for next diagonal vH\r
+ vHUp2 = vHUp1;\r
+\r
+ // Update highest score encountered this far\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vH);\r
+ }\r
+\r
+ for (l = 0; j < queryLength; ++j, ++l)\r
+ {\r
+ for (k = 0; k < 16; ++k) {\r
+ int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
+ pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
+ }\r
+\r
+ // load values of vE and vH from previous row (one unit up)\r
+ vEUp = _mm_load_si128 (pvEStore + j);\r
+ vHUp1 = _mm_load_si128 (pvHStore + j);\r
+\r
+ // save old values of vE and vH to use on next row\r
+ _mm_store_si128 (pvEStore + l, vE);\r
+ _mm_store_si128 (pvHStore + l, vH);\r
+ \r
+ // shift into place so we have complete vE and vH vectors\r
+ // that refer to the values one unit up from each cell\r
+ // that we are currently working on.\r
+ vTemp = _mm_slli_si128 (vE, 1);\r
+ vEUp = _mm_srli_si128 (vEUp, 15);\r
+ vEUp = _mm_or_si128 (vEUp, vTemp);\r
+\r
+ vTemp = _mm_slli_si128 (vH, 1);\r
+ vHUp1 = _mm_srli_si128 (vHUp1, 15);\r
+ vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
+\r
+ // do the dynamic programming\r
+\r
+ // update vE value\r
+ vE = _mm_subs_epu8 (vEUp, vGapExtend);\r
+ vTemp = _mm_subs_epu8 (vHUp1, vGapOpen);\r
+ vE = _mm_max_epu8 (vE, vTemp);\r
+\r
+ // update vF value\r
+ vF = _mm_subs_epu8 (vF, vGapExtend);\r
+ vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
+ vF = _mm_max_epu8 (vF, vTemp);\r
+\r
+ // add score to vH\r
+ vH = _mm_adds_epu8(vHUp2, vScore);\r
+ vH = _mm_subs_epu8 (vH, vBias);\r
+\r
+ // set vH to max of vH, vE, vF\r
+ vH = _mm_max_epu8 (vH, vE);\r
+ vH = _mm_max_epu8 (vH, vF);\r
+\r
+ // Save value to use for next diagonal vH\r
+ vHUp2 = vHUp1;\r
+\r
+ // Update highest score encountered this far\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vH);\r
+ }\r
+\r
+ for (m = 0 ; m < 15; ++j, ++l, ++m)\r
+ {\r
+ for (k = 0; k <= m; ++k) {\r
+ pScore[k] = (char) bias;\r
+ }\r
+ for ( ; k < 16; ++k) {\r
+ int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
+ pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
+ }\r
+\r
+ // save old values of vE and vH to use on next row\r
+ _mm_store_si128 (pvEStore + l, vE);\r
+ _mm_store_si128 (pvHStore + l, vH);\r
+\r
+ // v_score_load contains all zeros\r
+ vEUp = _mm_slli_si128 (vE, 1);\r
+ vHUp1 = _mm_slli_si128 (vH, 1);\r
+\r
+ // do the dynamic programming\r
+\r
+ // update vE value\r
+ vE = _mm_subs_epu8 (vEUp, vGapExtend);\r
+ vTemp = _mm_subs_epu8 (vHUp1, vGapOpen);\r
+ vE = _mm_max_epu8 (vE, vTemp);\r
+\r
+ // update vF value\r
+ vF = _mm_subs_epu8 (vF, vGapExtend);\r
+ vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
+ vF = _mm_max_epu8 (vF, vTemp);\r
+\r
+ // add score to vH\r
+ vH = _mm_adds_epu8 (vHUp2, vScore);\r
+ vH = _mm_subs_epu8 (vH, vBias);\r
+\r
+ // set vH to max of vH, vE, vF\r
+ vH = _mm_max_epu8 (vH, vE);\r
+ vH = _mm_max_epu8 (vH, vF);\r
+\r
+ // Save value to use for next diagonal vH\r
+ vHUp2 = vHUp1;\r
+\r
+ // Update highest score encountered this far\r
+ vMaxScore = _mm_max_epu8(vMaxScore,vH);\r
+ }\r
+\r
+ _mm_store_si128 (pvEStore + l, vE);\r
+ _mm_store_si128 (pvHStore + l, vH);\r
+ }\r
+\r
+ // find largest score in the vMaxScore vector\r
+ vTemp = _mm_srli_si128 (vMaxScore, 8);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 4);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 2);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+ vTemp = _mm_srli_si128 (vMaxScore, 1);\r
+ vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
+\r
+ // store in temporary variable\r
+ score = (short) _mm_extract_epi16 (vMaxScore, 0);\r
+ score = score & 0x00ff;\r
+\r
+ // check if we might have overflowed\r
+ if (score + bias >= 255)\r
+ {\r
+ score = 255;\r
+ }\r
+\r
+\r
+ // return largest score\r
+ return score;\r
+}\r
--- /dev/null
+/******************************************************************\r
+ Copyright 2006 by Michael Farrar. All rights reserved.\r
+ This program may not be sold or incorporated into a commercial product,\r
+ in whole or in part, without written consent of Michael Farrar. For \r
+ further information regarding permission for use or reproduction, please \r
+ contact: Michael Farrar at farrar.michael@gmail.com.\r
+*******************************************************************/\r
+\r
+/*\r
+ Written by Michael Farrar, 2006.\r
+ Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
+*/\r
+\r
+#ifndef INCLUDE_SWWOZNIAK_H\r
+#define INCLUDE_SWWOZNIAK_H\r
+\r
+#include "swsse2.h"\r
+#include "fastalib.h"\r
+\r
+#define MATRIX_ROW_SIZE 32\r
+\r
+SW_DATA *\r
+swWozniakInit (unsigned char *querySeq,\r
+ int queryLength,\r
+ signed char *matrix);\r
+\r
+\r
+void \r
+swWozniakScan (unsigned char *querySeq,\r
+ int queryLength,\r
+ FASTA_LIB *dbLib,\r
+ void *swData,\r
+ SEARCH_OPTIONS *options,\r
+ SCORE_LIST *scores);\r
+\r
+void\r
+swWozniakComplete (SW_DATA *pSwData);\r
+\r
+\r
+#endif /* INCLUDE_SWWOZNIAK_H */\r