CXXFLAGS=$CFLAGS
AC_CONFIG_FILES( [Makefile
- src/Makefile
- src/swsse2/Makefile] )
+ src/Makefile] )
AC_OUTPUT
-SUBDIRS = swsse2
-
bin_PROGRAMS = fastq-grep fastq-kmers fastq-match
-fastq_grep_SOURCES = kseq.h fastq-grep.c fastq-parse.c fastq-common.c
+fastq_common_src=fastq-common.h fastq-common.c
+fastq_parse_src=fastq-parse.h fastq-parse.c
+fastq_sw_src=fastq-sw.h fastq-sw.c
+
+fastq_grep_SOURCES = fastq-grep.c $(fastq_common_src) $(fastq_parse_src)
fastq_grep_LDADD = $(PCRE_LIBS) -lz
-fastq_kmers_SOURCES = kseq.h fastq-kmers.c fastq-parse.c fastq-common.c
+fastq_kmers_SOURCES = fastq-kmers.c $(fastq_common_src) $(fastq_parse_src)
fastq_kmers_LDADD = -lz
-fastq_match_SOURCES = kseq.h fastq-match.c fastq-parse.c fastq-common.c
-fastq_match_LDADD = swsse2/libswsse2.la -lz
-fastq_match_CFLAGS = -msse2
+fastq_match_SOURCES = fastq-match.c $(fastq_common_src) $(fastq_parse_src) $(fastq_sw_src)
+fastq_match_LDADD = -lz
+
#include "fastq-common.h"
#include "fastq-parse.h"
-#include "swsse2/blosum62.h"
-#include "swsse2/swsse2.h"
-#include "swsse2/matrix.h"
-#include "swsse2/swstriped.h"
+#include "fastq-sw.h"
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
}
-void convert_sequence(unsigned char* s, int n)
-{
- int i;
- for (i = 0; i < n; i++) {
- s[i] = (char)AMINO_ACID_VALUE[(int)s[i]];
- }
-}
void fastq_match(FILE* fin, FILE* fout,
- SwStripedData* sw_data,
- unsigned char* query, int n,
- SEARCH_OPTIONS* options)
+ sw_t* sw,
+ unsigned char* query, int n)
{
int score;
- int gap_init = -(options->gapInit + options->gapExt);
- int gap_ext = -options->gapExt;
- int threshold = options->threshold;
-
fastq_t* fqf = fastq_open(fin);
seq_t* seq = fastq_alloc_seq();
while (fastq_next(fqf, seq)) {
fprintf(fout, "%s\t", seq->seq.s);
- convert_sequence((unsigned char*)seq->seq.s, seq->seq.n);
-
- score = swStripedByte(query, n,
- (unsigned char*)seq->seq.s, seq->seq.n,
- gap_init, gap_ext,
- sw_data->pvbQueryProf,
- sw_data->pvH1,
- sw_data->pvH2,
- sw_data->pvE,
- sw_data->bias);
- if (score >= 255) {
- score = swStripedWord(query, n,
- (unsigned char*)seq->seq.s, seq->seq.n,
- gap_init, gap_ext,
- sw_data->pvbQueryProf,
- sw_data->pvH1,
- sw_data->pvH2,
- sw_data->pvE);
- }
+ fastq_sw_conv_seq((unsigned char*)seq->seq.s, seq->seq.n);
+ score = fastq_sw(sw, (unsigned char*)seq->seq.s, seq->seq.n);
fprintf(fout, "%d\n", score);
}
unsigned char* query;
int query_len;
- SwStripedData* sw_data;
- signed char* mat = blosum62;
- SEARCH_OPTIONS options;
- options.gapInit = -10;
- options.gapExt = -2;
- options.threshold = -1;
+ sw_t* sw;
FILE* fin;
static struct option long_options[] =
{
- {"help", no_argument, &help_flag, 1},
+ {"help", no_argument, &help_flag, 1},
+ {"gap-init", required_argument, NULL, 0},
+ {"gap-extend", required_argument, NULL, 0},
{0, 0, 0, 0}
};
query = (unsigned char*)argv[optind++];
query_len = strlen((char*)query);
- convert_sequence(query, query_len);
+ fastq_sw_conv_seq(query, query_len);
- sw_data = swStripedInit(query, query_len, mat);
+ sw = fastq_alloc_sw(query, query_len);
if (optind >= argc || (argc - optind == 1 && strcmp(argv[optind],"-") == 0)) {
- fastq_match(stdin, stdout, sw_data, query, query_len, &options);
+ fastq_match(stdin, stdout, sw, query, query_len);
}
else {
for (; optind < argc; optind++) {
continue;
}
- fastq_match(fin, stdout, sw_data, query, query_len, &options);
+ fastq_match(fin, stdout, sw, query, query_len);
}
}
- swStripedComplete(sw_data);
+ fastq_free_sw(sw);
return 0;
}
--- /dev/null
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * This is a very simple, 'fast enough' implementation of the Smith-Waterman
+ * algorithm specifically for short nucleotide sequences, working in O(mn) time
+ * and O(m) space, implemented according to the original Gotoh paper and
+ * Phil Green's implementation in cross_match.
+ *
+ * There is no fancy bit packing or vectorization, but such features would offer
+ * diminishing returns when aligning short sequences such as high throughput
+ * sequencing data. For example the Farrar SSE2 algorithm might be a tiny bit
+ * faster, but would diminish portability.
+ *
+ */
+
+
+#include "fastq-sw.h"
+#include "fastq-common.h"
+#include <string.h>
+#include <stdlib.h>
+
+
+static const int sw_default_d[25] =
+ /* A C G T N */
+ { 1, -2, -2, -2 , 0,
+ -2, 1, -2, -2, 0,
+ -2, -2, 1, -2, 0,
+ -2, -2, -2, 1, 0,
+ 0, 0, 0, 0, 0 };
+
+static const int sw_mat50_d[25] =
+ { 2, -2, 0, -2 , 0,
+ -2, 2, -2, 0, 0,
+ 0, -2, 2, -2, 0,
+ -2, 0, -2, 2, 0,
+ 0, 0, 0, 0, 0 };
+
+static const int sw_mat70_d[25] =
+ { 2, -2, -1, -2 , 0,
+ -2, 2, -2, -1, 0,
+ -1, -2, 2, -2, 0,
+ -2, -1, -2, 2, 0,
+ 0, 0, 0, 0, 0 };
+
+
+static inline int imax(int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static inline int imax4(int a, int b, int c, int d)
+{
+ return imax(imax(a,b), imax(c,d));
+}
+
+
+
+void fastq_sw_conv_seq(unsigned char* seq, int n)
+{
+ while (*seq || n--) {
+ switch (*seq) {
+ case 'A' :
+ case 'a':
+ case 'U':
+ case 'u':
+ *seq = 0;
+ break;
+
+ case 'C':
+ case 'c':
+ *seq = 1;
+ break;
+
+ case 'G':
+ case 'g':
+ *seq = 2;
+ break;
+
+ case 'T':
+ case 't':
+ *seq = 3;
+ break;
+
+ case 'N':
+ case 'n':
+ default:
+ *seq = 4;
+ }
+
+ seq++;
+ }
+}
+
+
+sw_t* fastq_alloc_sw(const unsigned char* subject, int size)
+{
+ sw_t* sw = malloc_or_die(sizeof(sw_t));
+ memcpy(sw->subject, subject, size);
+
+ /* default cost matrix */
+ memcpy(sw->d, sw_default_d, 25 * sizeof(int));
+
+ /* default gap costs */
+ sw->gap_open = -4;
+ sw->gap_extend = -3;
+
+ sw->work0 = malloc_or_die(size * sizeof(int));
+ sw->work1 = malloc_or_die(size * sizeof(int));
+ sw->size = size;
+
+ return sw;
+}
+
+
+void fastq_free_sw(sw_t* sw)
+{
+ free(sw->subject);
+ free(sw->work0);
+ free(sw->work1);
+ free(sw);
+}
+
+
+
+int fastq_sw(sw_t* sw, const unsigned char* x, int n)
+{
+ /*const int max_score = 65535;*/
+
+ /* conveniance */
+ int* maxstu = sw->work0;
+ int* t = sw->work1;
+ int m = sw->size;
+ const int* d = sw->d;
+ int gap_op = sw->gap_open;
+ int gap_ex = sw->gap_extend;
+ unsigned char* y = sw->subject;
+
+ int i, j;
+
+ /* zero maxstu row */
+ memset(maxstu, 0, m * sizeof(int));
+
+ /* initialize t row to a negative value to prohibit
+ * leading with gap extensions */
+ for (j = 0; j < m; j++) t[j] = -1;
+
+ int u, s;
+ int maxstu0;
+
+ for (i = 0; i < n; i++) {
+
+ /* special case for column 0 */
+ t[j] = imax(maxstu[j] + gap_op, t[j] + gap_ex);
+ u = gap_op;
+ maxstu[0] = imax4(0, d[5 * y[j] + x[j]], t[j], u);
+ maxstu0 = 0;
+
+
+ for (j = 1; j < m; j++) {
+ t[j] = imax(maxstu[j] + gap_op, t[j] + gap_ex);
+ u = imax(maxstu[j-1] + gap_op, u + gap_ex);
+ s = maxstu0 + d[5 * y[j] + x[i]];
+ maxstu0 = maxstu[j];
+
+ maxstu[j] = imax4(0, s, t[j], u);
+ }
+ }
+
+ /* return maximum from maxstu */
+ maxstu0 = 0;
+ for (j = 0; j <m; j++) maxstu0 = imax(maxstu0, maxstu[j]);
+
+ return maxstu0;
+}
+
+
+
--- /dev/null
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * fastq-sw :
+ * Local alignments of nucleotide sequences via Smith-Waterman.
+ *
+ */
+
+#ifndef FASTQ_TOOLS_SW_H
+#define FASTQ_TOOLS_SW_H
+
+
+typedef struct
+{
+ unsigned char* subject;
+ int size;
+
+ int d[25]; /* cost matrix */
+ int gap_open; /* gap open */
+ int gap_extend; /* gap extend */
+
+ /* matrix rows, used internally */
+ int* work0;
+ int* work1;
+
+} sw_t;
+
+/* convert a n ASCII nucleotide sequence to one suitable for fastq_sw */
+void fastq_sw_conv_seq(unsigned char*, int n);
+
+sw_t* fastq_alloc_sw(const unsigned char *subject, int size);
+void fastq_free_sw(sw_t*);
+
+int fastq_sw(sw_t*, const unsigned char* query, int size);
+
+#endif
+
+++ /dev/null
-
- Copyright 2006, by Michael Farrar. All rights reserved. The SWSSE2
- program and documentation may not be sold or incorporated into a
- commercial product, in whole or in part, without written consent of
- Michael Farrar.
-
- For further information regarding permission for use or reproduction,
- please contact Michael Farrar at:
-
- farrar.michael@gmail.com
-
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+++ /dev/null
-
-noinst_LTLIBRARIES = libswsse2.la
-
-libswsse2_la_SOURCES = fastalib.h fastalib.c \
- matrix.h matrix.c \
- swscalar.h swscalar.c \
- swsse2.h swsse2.c \
- swstriped.h swstriped.c \
- swwozniak.h swwozniak.c \
- blosum62.h blosum62.c
-
-
-libswsse2_la_CFLAGS = -msse2
-
-EXTRADIST = README COPYRIGHT
-
+++ /dev/null
-Introduction
-
-The Smith-Waterman [1] algorithm is one of the most sensitive sequencing
-algorithms in use today. It is also the slowest due to the number of
-calculations needed to perform the search. To speed up the algorithm, it
-has been adapted to use Single Instruction Multiple Data, SIMD, instructions
-found on many common microprocessors today. SIMD instructions are able to
-perform the same operation on multiple pieces of data parallel.
-
-The program swsse2 introduces a new SIMD implementation of the Smith-Waterman
-algorithm for the X86 processor. The weights are precomputed parallel to the
-query sequence, like the Rognes [2] implementation, but are accessed in the
-striped pattern. The new implementation reached speeds six times faster than
-other SIMD implementations.
-
-Below is a graph comparing the total search times of 11 queries, 3806 residues,
-against the Swiss-Prot 49.1 database, 75,841,138 residues. The tests were run
-on a PC with a 2.00GHz Intel Xeon Core 2 Duo processor with 2 GB RAM. The
-program is singlely threaded, so the number of cores has no affect on the run
-times. The Wozniak, Rognes and striped implementations were run with the
-scoring matrices BLOSUM50 and BLOSUM62 and four different gap penalties, 10-k,
-10-2k, 14-2k and 40-2k. Since the Wozniak's runtime does not change depending
-on the scoring matrix, one line is used for both scoring matrices.
-
-Build Instructions
-
- * Download the zip file with the swsse2 sources.
- * Unzip the sources.
- * Load the swsse2.vcproj file into Microsoft Visual C++ 2005.
- * Build the project (F7). For optimized code, be sure to change the
- configuration to a Release build.
- * The swsse2.exe file is in the Release directory ready to be run.
-
-Running
-
-To run swsse2 three files must be provided, the scoring matrix, query sequence
-and the database sequence. Four scoring matrices are provided with the
-release, BLOSUM45, BLOSUM50, BLOSUM62 and BLOSUM80. The query sequence and
-database sequence must be in the FASTA format. For example, to run with the
-default gap penalties 10-2k, the scoring matrix BLOSUM50, the query sequence
-ptest1.fasta and the sequence database db.fasta use:
-
- c:\swsse2>.\Release\swsse2.exe blosum50.mat ptest1.fasta db.fasta
- ptest1.fasta vs db.fasta
- Matrix: blosum50.mat, Init: -10, Ext: -2
-
- Score Description
- 53 108_LYCES Protein 108 precursor.
- 53 10KD_VIGUN 10 kDa protein precursor (Clone PSAS10).
- 32 1431_ECHGR 14-3-3 protein homolog 1.
- 32 1431_ECHMU 14-3-3 protein homolog 1 (Emma14-3-3.1).
- 27 110K_PLAKN 110 kDa antigen (PK110) (Fragment).
- 26 1432_ECHGR 14-3-3 protein homolog 2.
- 25 13S1_FAGES 13S globulin seed storage protein 1
- 25 13S3_FAGES 13S globulin seed storage protein 3
- 25 13S2_FAGES 13S globulin seed storage protein 2
- 23 12S1_ARATH 12S seed storage protein CRA1
- 22 13SB_FAGES 13S globulin basic chain.
- 21 12AH_CLOS4 12-alpha-hydroxysteroid dehydrogenase
- 21 140U_DROME RPII140-upstream protein.
- 21 12S2_ARATH 12S seed storage protein CRB
- 21 1431_LYCES 14-3-3 protein 1.
- 20 1431_ARATH 14-3-3-like protein GF14
-
- 21 residues in query string
- 2014 residues in 25 library sequences
- Scan time: 0.000 (Striped implementation)
-
-Options
-
- Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] [-c num]
- matrix query db
-
- -h : this help message
- -n : run a non-vectorized Smith-Waterman search
- -w : run a vectorized Wozniak search
- -r : run a vectorized Rognes search (NOT SUPPORTED)
- -s : run a vectorized striped search (default)
- -i num : gap init penalty (default -10)
- -e num : gap extension penalty (default -2)
- -t num : minimum score threshold (default 20)
- -c num : number of scores to be displayed (default 250)
- matrix : scoring matrix file
- query : query sequence file (fasta format)
- db : sequence database file (fasta format)
-
-Note
-
-The Rognes implementation is not released as part of the swsse2 package due to
-patent concerns.
-
-References
-
-[1] Smith, T. F. and Waterman, M. S. (1981) Identification of common molecular subsequences. J. Mol. Biol., 147, 195-197.
-
-[2] Rognes, T. and Seeberg, E. (2000) Six-fold speed-up of the Smith-Waterman sequence database searches using parallel processing on common microprocessors. Bioinformatics, 16, 699-706.
-
-
-
-
+++ /dev/null
-
-
-signed char blosum62[] =
-{
- 4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1,
- -2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1,
- 0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3,
- -2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1,
- -1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4,
- -2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3,
- 0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2,
- -2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0,
- -1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3,
- -1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1,
- -1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3,
- -1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1,
- -2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0,
- -1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1,
- -1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3,
- -1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0,
- 1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0,
- 0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1,
- 0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2,
- -3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3,
- 0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1,
- -2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2,
- -1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4
-};
-
-
-
-
+++ /dev/null
-
-#ifndef BLOSUM62_H_
-#define BLOSUM62_H_
-
-extern signed char blosum62[];
-
-#endif
-
-
+++ /dev/null
-/******************************************************************
- Copyright 2006 by Michael Farrar. All rights reserved.
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar. For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-
-/*
- Written by Michael Farrar, 2006.
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-
-#include "swsse2.h"
-#include "fastalib.h"
-
-#define READ_BUFFER_SIZE (128 * 1024)
-#define SEQ_NAME_SIZE (128)
-
-FASTA_LIB *openLib (char *file, int pad)
-{
- FILE *fp;
-
- FASTA_LIB *lib;
-
- if ((fp = fopen (file, "r")) == NULL) {
- fprintf (stderr, "Unable to open file %s\n", file);
- exit (-1);
- }
-
- lib = (FASTA_LIB *) malloc (sizeof (FASTA_LIB));
- if (!lib) {
- fprintf (stderr, "Unable to allocate memory for library header\n");
- exit (-1);
- }
-
- lib->readBuffer = (char *) malloc (READ_BUFFER_SIZE);
- if (!lib->readBuffer) {
- fprintf (stderr, "Unable to allocate memory for read buffer\n");
- exit (-1);
- }
-
- lib->seqBuffer = (unsigned char *) malloc (MAX_SEQ_LENGTH);
- if (!lib->seqBuffer) {
- fprintf (stderr, "Unable to allocate memory for sequence\n");
- exit (-1);
- }
-
- lib->seqName = (char *) malloc (SEQ_NAME_SIZE);
- if (!lib->seqName) {
- fprintf (stderr, "Unable to allocate memory for sequence name\n");
- exit (-1);
- }
-
- lib->size = (int) fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp);
- if (lib->size == 0 && !feof (fp)) {
- fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp));
- exit (-1);
- }
-
- lib->pos = 0;
-
- lib->fp = fp;
-
- lib->sequences = 0;
- lib->residues = 0;
-
- lib->pad;
-
- return lib;
-}
-
-static int
-readNextBlock (FASTA_LIB *lib)
-{
- FILE *fp = lib->fp;
- size_t size;
-
- size = fread (lib->readBuffer, sizeof (char), READ_BUFFER_SIZE, fp);
- if (lib->size == 0 && !feof (fp)) {
- fprintf (stderr, "Error (%d) reading fasta file\n", ferror (fp));
- exit (-1);
- }
-
- lib->pos = 0;
- lib->size = (int) size;
-
- return lib->size;
-}
-
-unsigned char *
-nextSeq (FASTA_LIB *lib, int *length)
-{
- int inx;
- int size;
- int done;
- int len;
-
- char *name = lib->seqName;
- unsigned char *seq = lib->seqBuffer;
-
- /* check if we are at the end of the library */
- if (lib->size == 0) {
- *length = 0;
- return NULL;
- }
-
- if (lib->pos == lib->size) {
- readNextBlock (lib);
- }
-
- inx = lib->pos;
-
- /* check for the start of a sequence */
- if (lib->readBuffer[inx] != '>') {
- fprintf (stderr, "Error parsing fasta file expecting > found %c\n",
- lib->readBuffer[inx]);
- exit (-1);
- }
-
- ++inx;
-
- /* read in the sequence name */
- len = 0;
- done = 0;
- do {
- if (inx >= lib->size) {
- size = readNextBlock (lib);
- if (size == 0) {
- *length = 0;
- return NULL;
- }
- inx = lib->pos;
- } else if (lib->readBuffer[inx] == '\n') {
- *name = '\0';
- done = 1;
- } else if (len < SEQ_NAME_SIZE - 1) {
- *name++ = lib->readBuffer[inx];
- len++;
- }
- ++inx;
- } while (!done);
-
- lib->pos = inx;
-
- /* read in the sequence */
- len = 0;
- done = 0;
- do {
- if (inx >= lib->size) {
- size = readNextBlock (lib);
- if (size == 0) {
- *seq = '\0';
- done = 1;
- }
- inx = 0;
- } else if (isspace(lib->readBuffer[inx])) {
- ++inx;
- } else if (lib->readBuffer[inx] == '>') {
- *seq = '\0';
- done = 1;
- } else if (len >= MAX_SEQ_LENGTH) {
- fprintf (stderr, "Sequence %s exceeds maximum length\n",
- lib->seqName);
- exit (-1);
- } else {
- int value = AMINO_ACID_VALUE[lib->readBuffer[inx]];
- if (value == -1) {
- fprintf (stderr, "Unknown amino acid %c in sequence %s\n",
- lib->readBuffer[inx], lib->seqName);
- exit (-1);
- }
- *seq++ = (char) value;
- inx++;
- len++;
- }
- } while (!done);
-
- lib->pos = inx;
- *length = len;
-
- lib->sequences++;
- lib->residues += len;
-
- /* check if we need to pad the sequence to a multiple of 16 */
- if (lib->pad) {
- inx = 16 - (len % 16);
- while (inx--) {
- *seq++ = ALPHA_SIZE;
- }
- *seq = '\0';
- }
-
- return lib->seqBuffer;
-}
-
-void closeLib (FASTA_LIB *lib)
-{
- fclose (lib->fp);
-
- free (lib->readBuffer);
- free (lib->seqBuffer);
- free (lib->seqName);
-
- free (lib);
-}
+++ /dev/null
-/******************************************************************
- Copyright 2006 by Michael Farrar. All rights reserved.
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar. For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-
-/*
- Written by Michael Farrar, 2006.
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-
-#ifndef INCLUDE_FASTALIB_H
-#define INCLUDE_FASTALIB_H
-
-#include <stdio.h>
-
-#define MAX_SEQ_LENGTH (64 * 1024)
-
-typedef struct {
- char *readBuffer;
-
- char *seqName;
- unsigned char *seqBuffer;
-
- int pos;
- int size;
-
- FILE *fp;
-
- int sequences;
- int residues;
-
- int pad;
-} FASTA_LIB;
-
-FASTA_LIB *openLib (char *file, int pad);
-void closeLib (FASTA_LIB *lib);
-
-unsigned char *nextSeq (FASTA_LIB *lib, int *length);
-
-#define seqName(LIB) (LIB->seqName)
-
-#endif /* INCLUDE_FASTALIB_H */
+++ /dev/null
-/******************************************************************
- Copyright 2006 by Michael Farrar. All rights reserved.
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar. For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-
-/*
- Written by Michael Farrar, 2006.
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-
-#include "swsse2.h"
-#include "matrix.h"
-
-#define BUF_SIZE 512
-
-char *skipSpaces (char *line)
-{
- while (isspace (*line)) {
- ++line;
- }
-
- return line;
-}
-
-signed char *readMatrix (char *file)
-{
- FILE *fp;
- char line[BUF_SIZE];
-
- signed char *matrix;
-
- int mark[ALPHA_SIZE];
- int order[ALPHA_SIZE];
-
- int done;
- int i;
-
- int errors = 0;
- int count = 0;
-
- if ((fp = fopen (file, "r")) == NULL) {
- fprintf (stderr, "Unable to open file %s\n", file);
- exit (-1);
- }
-
- matrix = (signed char *) malloc (ALPHA_SIZE * ALPHA_SIZE);
- if (!matrix) {
- fprintf (stderr, "Unable to allocate memory for scoring matrix\n");
- exit (-1);
- }
-
- /* initialize the order and mark arrays */
- for (i = 0; i < ALPHA_SIZE; ++i) {
- order[i] = -1;
- mark[i] = -1;
- }
-
- /* read the first line of the matrix giving the amino acid order */
- done = 0;
- while (!done && fgets (line, BUF_SIZE, fp) != NULL) {
- char *ptr = skipSpaces (line);
- if (*ptr && *ptr != '#') {
-
- while (*ptr && *ptr != '#') {
- int inx = AMINO_ACID_VALUE[*ptr];
-
- if (inx == -1) {
- fprintf (stderr, "Unknown amino acid %c in %s\n", *ptr, file);
- ++errors;
- } else if (mark[inx] != -1) {
- fprintf (stderr, "Amino acid %c defined twice\n", *ptr);
- ++errors;
- } else if (count >= ALPHA_SIZE) {
- /* this should not happen, but we will be safe */
- fprintf (stderr, "Too many amino acids %d\n", count);
- ++errors;
- } else {
- order[count++] = inx;
- mark[inx] = inx;
- }
- ptr = skipSpaces (ptr + 1);
- }
-
- done = 1;
- }
- }
-
- /* make sure all amino acids are defined */
- for (i = 0; i < ALPHA_SIZE; ++i) {
- if (order[i] < 0) {
- fprintf (stderr, "Missing column for amino acid %c\n",
- AMINO_ACIDS[i]);
- ++errors;
- }
- mark[i] = -1;
- }
-
- if (errors > 0) {
- fprintf (stderr, "Terminating due to errors in matrix file\n");
- exit (-1);
- }
-
- /* read the scores for the amino acids */
- while (fgets (line, BUF_SIZE, fp) != NULL) {
- signed char *row;
- char *ptr = skipSpaces (line);
- if (*ptr && *ptr != '#') {
- char aminoAcid = *ptr;
- int inx = AMINO_ACID_VALUE[*ptr];
- if (inx == -1) {
- fprintf (stderr, "Unknown amino acid %c in matrix\n", *ptr);
- ++errors;
- } else if (mark[inx] != -1) {
- fprintf (stderr, "Row %c defined twice\n", *ptr);
- ++errors;
- }
-
- row = &matrix[inx * ALPHA_SIZE];
-
- for (i = 0; i < ALPHA_SIZE; ++i) {
- int sign = 1;
- int num = 0;
-
- ptr = skipSpaces (ptr + 1);
-
- /* check the sign */
- if (*ptr == '-') {
- sign = -1;
- ++ptr;
- }
-
- do {
- if (*ptr >= '0' && *ptr <= '9') {
- num = num * 10 + (*ptr - '0');
- ptr++;
- } else {
- char name[16];
- char *pName;
- if (isspace (*ptr)) {
- pName = "space";
- } else if (*ptr == 0) {
- pName = "end of line";
- } else {
- name[0] = *ptr;
- name[1] = 0;
- pName = name;
- }
- fprintf (stderr, "Row %c Expecting digit found %s\n",
- aminoAcid, pName);
- exit (-1);
- }
- } while (*ptr && !isspace (*ptr));
-
- num = num * sign;
-
- if (num < -128 || num > 127) {
- fprintf (stderr, "Weight %d out of range row %c\n",
- aminoAcid, num);
- ++errors;
- num = 0;
- }
-
- row[order[i]] = (char) num;
- }
-
- if (i < ALPHA_SIZE) {
- fprintf (stderr, "Amino acid row %c incomplete\n", aminoAcid);
- ++errors;
- }
-
- mark[inx] = 1;
- }
- }
-
- /* make sure all amino acids are defined */
- for (i = 0; i < ALPHA_SIZE; ++i) {
- if (mark[i] < 0) {
- fprintf (stderr, "Missing row for amino acid %c\n",
- AMINO_ACIDS[i]);
- ++errors;
- }
- }
-
- if (errors) {
- fprintf (stderr, "Terminating due to errors in matrix %s\n", file);
- }
-
- fclose (fp);
-
- return matrix;
-}
+++ /dev/null
-/******************************************************************
- Copyright 2006 by Michael Farrar. All rights reserved.
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar. For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-
-/*
- Written by Michael Farrar, 2006.
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-
-#ifndef INCLUDE_MATRIX_H
-#define INCLUDE_MATRIX_H
-
-signed char *readMatrix (char *file);
-
-#endif /* INCLUDE_MATRIX_H */
+++ /dev/null
-/******************************************************************\r
- Copyright 2006 by Michael Farrar. All rights reserved.\r
- This program may not be sold or incorporated into a commercial product,\r
- in whole or in part, without written consent of Michael Farrar. For \r
- further information regarding permission for use or reproduction, please \r
- contact: Michael Farrar at farrar.michael@gmail.com.\r
-*******************************************************************/\r
-\r
-/*\r
- Written by Michael Farrar, 2006.\r
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
-*/\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-\r
-#include "swsse2.h"\r
-#include "swscalar.h"\r
-\r
-typedef struct {\r
- int *pMatrix;\r
- int *pH;\r
- int *pE;\r
- int *pData;\r
-} SwScalarData;\r
-\r
-int\r
-swScalar (unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- int *pMatrix,\r
- int *pH,\r
- int *pE);\r
-\r
-void *\r
-swScalarInit(unsigned char *querySeq,\r
- int queryLength,\r
- signed char *matrix)\r
-{\r
- int i, j;\r
- int nCount;\r
-\r
- int *pi;\r
-\r
- SwScalarData *pSwData;\r
- \r
- /* remove unreferenced warnings */\r
- querySeq;\r
-\r
- pSwData = (SwScalarData *) malloc (sizeof (SwScalarData));\r
- if (!pSwData) {\r
- fprintf (stderr, "Unable to allocate memory for SW data\n");\r
- exit (-1);\r
- }\r
-\r
- nCount = ALPHA_SIZE * ALPHA_SIZE + /* matrix */\r
- (queryLength * 3); /* vH1, vH2 and vE */\r
-\r
- pSwData->pData = (int *) calloc (nCount, sizeof (int));\r
- if (!pSwData->pData) {\r
- fprintf (stderr, "Unable to allocate memory for SW data buffers\n");\r
- exit (-1);\r
- }\r
-\r
- pSwData->pMatrix = pSwData->pData;\r
-\r
- pSwData->pH = pSwData->pMatrix + ALPHA_SIZE * ALPHA_SIZE;\r
- pSwData->pE = pSwData->pH + queryLength;\r
-\r
- /* Conver the scoring matrix to ints */\r
- pi = pSwData->pMatrix;\r
- for (i = 0; i < ALPHA_SIZE; ++i) {\r
- for (j = 0; j < ALPHA_SIZE; ++j) {\r
- *pi++ = (int) *matrix++;\r
- }\r
- }\r
-\r
- return pSwData;\r
-}\r
-\r
-void swScalarScan (unsigned char *querySeq,\r
- int queryLength,\r
- FASTA_LIB *dbLib,\r
- void *swData,\r
- SEARCH_OPTIONS *options,\r
- SCORE_LIST *scores)\r
-{\r
- int score;\r
-\r
- int threshold = options->threshold;\r
-\r
- unsigned char *dbSeq;\r
- int dbLen;\r
-\r
- int gapInit = -(options->gapInit + options->gapExt);\r
- int gapExt = -options->gapExt;\r
-\r
- SwScalarData *scalarData = (SwScalarData *) swData;\r
-\r
- dbSeq = nextSeq (dbLib, &dbLen);\r
- while (dbLen > 0) {\r
-\r
- score = swScalar (querySeq, queryLength, \r
- dbSeq, dbLen, \r
- gapInit, gapExt, \r
- scalarData->pMatrix,\r
- scalarData->pH,\r
- scalarData->pE);\r
-\r
- if (score >= threshold) {\r
- int minScore = insertList (scores, score, seqName (dbLib));\r
- if (minScore >= threshold) {\r
- threshold = minScore;\r
- }\r
- }\r
-\r
- dbSeq = nextSeq (dbLib, &dbLen);\r
- }\r
-}\r
-\r
-void\r
-swScalarComplete(void *pSwData)\r
-{\r
- SwScalarData *pScalarData = (SwScalarData *) pSwData;\r
-\r
- free (pScalarData->pData);\r
- free (pScalarData);\r
-}\r
-\r
-int\r
-swScalar(unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- int *pMatrix,\r
- int *pH,\r
- int *pE)\r
-{\r
- int i, j;\r
-\r
- int E, F, H;\r
- int prevH;\r
-\r
- int maxScore = 0;\r
-\r
- int *pScore;\r
-\r
- /* Zero out the storage vector */\r
- for (i = 0; i < queryLength; i++)\r
- {\r
- *(pE + i) = 0;\r
- *(pH + i) = 0;\r
- }\r
-\r
- for (i = 0; i < dbLength; ++i)\r
- {\r
- /* fetch first data asap. */\r
- pScore = pMatrix + dbSeq[i] * ALPHA_SIZE;\r
-\r
- /* zero out F. */\r
- F = 0;\r
-\r
- /* load the next h value */\r
- prevH = 0;\r
-\r
- for (j = 0; j < queryLength; j++)\r
- {\r
- /* load E values */\r
- E = *(pE + j);\r
-\r
- /* add score to H */\r
- H = prevH + *(pScore + querySeq[j]);\r
- if (H < 0) {\r
- H = 0;\r
- }\r
-\r
- /* Update highest score encountered this far */\r
- if (H > maxScore) {\r
- maxScore = H;\r
- }\r
-\r
- /* get max from H, E and F */\r
- if (E > H) {\r
- H = E;\r
- }\r
- if (F > H) {\r
- H = F;\r
- }\r
-\r
- /* save H values */\r
- prevH = *(pH + j);\r
- *(pH + j) = H;\r
-\r
- H = H - gapOpen;\r
-\r
- /* update E value */\r
- E = E - gapExtend;\r
- if (H > E) {\r
- E = H;\r
- }\r
- if (E < 0) {\r
- E = 0;\r
- }\r
-\r
- /* update vF value */\r
- F = F - gapExtend;\r
- if (H > F) {\r
- F = H;\r
- }\r
- if (F < 0) {\r
- F = 0;\r
- }\r
-\r
- /* save vE values */\r
- *(pE + j) = E;\r
- }\r
- }\r
-\r
- /* return largest score */\r
- return maxScore;\r
-}\r
+++ /dev/null
-/******************************************************************
- Copyright 2006 by Michael Farrar. All rights reserved.
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar. For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-
-/*
- Written by Michael Farrar, 2006.
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-
-#ifndef INCLUDE_SWSCALAR_H
-#define INCLUDE_SWSCALAR_H
-
-#include "swsse2.h"
-#include "fastalib.h"
-
-SW_DATA *
-swScalarInit (unsigned char *querySeq,
- int queryLength,
- signed char *matrix);
-
-
-void
-swScalarScan (unsigned char *querySeq,
- int queryLength,
- FASTA_LIB *dbLib,
- void *swData,
- SEARCH_OPTIONS *options,
- SCORE_LIST *scores);
-
-
-void
-swScalarComplete (SW_DATA *pSwData);
-
-
-#endif /* INCLUDE_SWSCALAR_H */
+++ /dev/null
-/******************************************************************\r
- Copyright 2006 by Michael Farrar. All rights reserved.\r
- This program may not be sold or incorporated into a commercial product,\r
- in whole or in part, without written consent of Michael Farrar. For \r
- further information regarding permission for use or reproduction, please \r
- contact: Michael Farrar at farrar.michael@gmail.com.\r
-*******************************************************************/\r
-\r
-/*\r
- Written by Michael Farrar, 2006.\r
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
-*/\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-\r
-#include <time.h>\r
-#include <sys/timeb.h>\r
-\r
-#include "swsse2.h"\r
-#include "matrix.h"\r
-#include "fastalib.h"\r
-\r
-#include "swscalar.h"\r
-#include "swwozniak.h"\r
-#ifdef WITH_ROGNES\r
-#include "swrognes.h"\r
-#endif\r
-#include "swstriped.h"\r
-\r
-typedef enum { SCALAR, \r
- WOZNIAK, \r
-#ifdef WITH_ROGNES\r
- ROGNES, \r
-#endif\r
- STRIPED \r
-} SW_TYPES;\r
-\r
-const char *SW_IMPLEMENATION[] = {\r
- "Non-optimized",\r
- "Wozniak",\r
-#ifdef WITH_ROGNES\r
- "Rognes",\r
-#endif\r
- "Striped",\r
-};\r
-\r
-typedef struct {\r
- SW_DATA *(*init) (unsigned char *querySeq,\r
- int queryLength,\r
- signed char *matrix);\r
- void (*scan) (unsigned char *querySeq,\r
- int queryLength,\r
- FASTA_LIB *dbLib,\r
- void *swData,\r
- SEARCH_OPTIONS *options,\r
- SCORE_LIST *scores);\r
- void (*done) (SW_DATA *pSwData);\r
-} SW_FUNCT_DEFS;\r
-\r
-SW_FUNCT_DEFS swFuncs[] = {\r
- {\r
- swScalarInit,\r
- swScalarScan,\r
- swScalarComplete,\r
- },\r
- {\r
- swWozniakInit,\r
- swWozniakScan,\r
- swWozniakComplete,\r
- },\r
-#ifdef WITH_ROGNES\r
- {\r
- swRognesInit,\r
- swRognesScan,\r
- swRognesComplete,\r
- },\r
-#endif\r
- {\r
- swStripedInit,\r
- swStripedScan,\r
- swStripedComplete,\r
- },\r
-};\r
-\r
-const char AMINO_ACIDS[ALPHA_SIZE] = {\r
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', \r
- 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', \r
- 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'\r
-};\r
-\r
-const int AMINO_ACID_VALUE[256] = {\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1,\r
- 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,\r
- -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1,\r
- 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\r
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1\r
-};\r
-\r
-SCORE_LIST *initList (int count);\r
-void freeList (SCORE_LIST *list);\r
-\r
-void printResults (SCORE_LIST *list);\r
-\r
-void printUsage (void)\r
-{\r
- printf ("Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] "\r
- "[-c num] matrix query db\n");\r
- printf (" -h : this help message\n");\r
- printf (" -n : run a non-vectorized Smith-Waterman search\n");\r
- printf (" -w : run a vectorized Wozniak search\n");\r
-#ifdef WITH_ROGNES\r
- printf (" -r : run a vectorized Rognes search\n");\r
-#else\r
- printf (" -r : run a vectorized Rognes search (NOT SUPPORTED)\n");\r
-#endif\r
- printf (" -s : run a vectorized striped search (default)\n");\r
- printf (" -i num : gap init penalty (default -10)\n");\r
- printf (" -e num : gap extension penalty (default -2)\n");\r
- printf (" -t num : minimum score threshold (default 20)\n");\r
- printf (" -c num : number of scores to be displayed (default 250)\n");\r
- printf (" matrix : scoring matrix file\n");\r
- printf (" query : query sequence file (fasta format)\n");\r
- printf (" db : sequence database file (fasta format)\n");\r
-}\r
-\r
-#if 0\r
-int main (int argc, char **argv)\r
-{\r
- int i;\r
-\r
- int penalty;\r
-\r
- int rptCount = 250;\r
-\r
- SW_TYPES swType = STRIPED;\r
-\r
- char *dbFile = NULL;\r
- char *queryFile = NULL;\r
- char *matrixFile = NULL;\r
-\r
- signed char *matrix;\r
-\r
- unsigned char *querySeq;\r
- int queryLen;\r
-\r
- SCORE_LIST *list;\r
-\r
- FASTA_LIB *queryLib;\r
- FASTA_LIB *dbLib;\r
-\r
- void *swData;\r
-\r
- struct _timeb startTime;\r
- struct _timeb endTime;\r
-\r
- double startTimeSec;\r
- double endTimeSec;\r
-\r
- SEARCH_OPTIONS options;\r
-\r
- if (argc < 4) {\r
- printUsage ();\r
- exit (-1);\r
- }\r
-\r
- /* set the default options */\r
- options.gapInit = -10;\r
- options.gapExt = -2;\r
- options.threshold = 20;\r
-\r
- i = 1;\r
- while (i < argc) {\r
- if (i + 3 == argc) {\r
- /* should be matrix file name */\r
- matrixFile = argv[i];\r
-\r
- } else if (i + 2 == argc) {\r
- /* should be query file name */\r
- queryFile = argv[i];\r
-\r
- } else if (i + 1 == argc) {\r
- /* should be matrix file name */\r
- dbFile = argv[i];\r
-\r
- } else {\r
- /* process arguements */\r
- switch (argv[i][1]) {\r
- case 'h':\r
- printUsage ();\r
- break;\r
- case 'n':\r
- swType = SCALAR;\r
- break;\r
- case 'r':\r
-#ifdef WITH_ROGNES\r
- swType = ROGNES;\r
-#else\r
- fprintf (stderr, "The ROGNES implementation is not supported\n");\r
- exit (-1);\r
-#endif\r
- break;\r
- case 'w':\r
- swType = WOZNIAK;\r
- break;\r
- case 's':\r
- swType = STRIPED;\r
- break;\r
- case 'i':\r
- penalty = atoi (argv[++i]);\r
- if (penalty > 0 || penalty < -128) {\r
- fprintf (stderr, "Invalid gap init %d\n", penalty);\r
- fprintf (stderr, "Valid range is 0 - -128\n");\r
- exit (-1);\r
- }\r
- options.gapInit = (unsigned short) penalty;\r
- break;\r
- case 'e':\r
- penalty = atoi (argv[++i]);\r
- if (penalty > 0 || penalty < -128) {\r
- fprintf (stderr, "Invalid gap extension %d\n", penalty);\r
- fprintf (stderr, "Valid range is 0 - -128\n");\r
- exit (-1);\r
- }\r
- options.gapExt = (unsigned short) penalty;\r
- break;\r
- case 't':\r
- options.threshold = atoi (argv[++i]);\r
- break;\r
- case 'c':\r
- rptCount = atoi (argv[++i]);\r
- if (rptCount < 10) {\r
- rptCount = 10;\r
- }\r
- break;\r
- default:\r
- fprintf (stderr, "Invalid option %s\n", argv[i]);\r
- printUsage ();\r
- exit (-1);\r
- }\r
- }\r
- ++i;\r
- }\r
-\r
- list = initList (rptCount);\r
-\r
- matrix = readMatrix (matrixFile);\r
- if (matrix == NULL) {\r
- fprintf (stderr, "Error reading matrix\n");\r
- exit (-1);\r
- }\r
-\r
- dbLib = openLib (dbFile, swType == WOZNIAK);\r
- queryLib = openLib (queryFile, 0);\r
-\r
- querySeq = nextSeq (queryLib, &queryLen);\r
- if (queryLen == 0) {\r
- fprintf (stderr, "Empty query sequence\n");\r
- exit (-1);\r
- }\r
-\r
- printf ("%s vs %s\n", queryFile, dbFile);\r
- printf ("Matrix: %s, Init: %d, Ext: %d\n\n", \r
- matrixFile, options.gapInit, options.gapExt);\r
-\r
- _ftime(&startTime);\r
-\r
- swData = (swFuncs[swType].init) (querySeq, queryLen, matrix);\r
-\r
- (swFuncs[swType].scan) (querySeq, queryLen, dbLib, swData, &options, list);\r
-\r
- (swFuncs[swType].done) (swData);\r
-\r
- _ftime(&endTime);\r
-\r
- printResults (list);\r
-\r
- printf ("\n");\r
- printf ("%d residues in query string\n", queryLen);\r
- printf ("%d residues in %d library sequences\n", \r
- dbLib->residues, dbLib->sequences);\r
-\r
- startTimeSec = startTime.time + startTime.millitm / 1000.0;\r
- endTimeSec = endTime.time + endTime.millitm / 1000.0;\r
- printf ("Scan time: %6.3f (%s implementation)\n", \r
- endTimeSec - startTimeSec,\r
- SW_IMPLEMENATION[swType]);\r
- \r
- closeLib (queryLib);\r
- closeLib (dbLib);\r
-\r
- free (matrix);\r
-\r
- freeList (list);\r
-}\r
-#endif\r
-\r
-SCORE_LIST *\r
-initList (int count)\r
-{\r
- int i;\r
-\r
- SCORE_LIST *hdr;\r
- SCORE_NODE *list;\r
- SCORE_NODE *prev;\r
-\r
- hdr = (SCORE_LIST *) malloc (sizeof (SCORE_LIST));\r
- if (hdr == NULL) {\r
- fprintf (stderr, "Cannot allocate storage for score header\n");\r
- exit (-1);\r
- }\r
-\r
- list = (SCORE_NODE *) calloc (count, sizeof (SCORE_NODE));\r
- if (list == NULL) {\r
- fprintf (stderr, "Cannot allocate storage for scores\n");\r
- exit (-1);\r
- }\r
-\r
- /* initialize the scores list */\r
- hdr->minScore = 0;\r
- hdr->first = NULL;\r
- hdr->last = NULL;\r
- hdr->free = list;\r
- hdr->buffer = list;\r
-\r
- prev = NULL;\r
- for (i = 0; i < count; ++i) {\r
- list[i].name[0] = '\0';\r
- list[i].score = 0;\r
-\r
- if (i == 0) {\r
- list[i].prev = NULL;\r
- } else {\r
- list[i].prev = &list[i-1];\r
- }\r
-\r
- if (i == count - 1) {\r
- list[i].next = NULL;\r
- } else {\r
- list[i].next = &list[i+1];\r
- }\r
- }\r
-\r
- return hdr;\r
-}\r
-\r
-void freeList (SCORE_LIST *list)\r
-{\r
- free (list->buffer);\r
- free (list);\r
-}\r
-\r
-int insertList (SCORE_LIST *list, int score, char *name)\r
-{\r
- SCORE_NODE *node;\r
- SCORE_NODE *ptr = list->first;\r
-\r
- if (list->free != NULL) {\r
- node = list->free;\r
- list->free = list->free->next;\r
- } else if (score > list->last->score) {\r
- node = list->last;\r
- list->last = node->prev;\r
- list->last->next = NULL;\r
- } else {\r
- /* should never happen */\r
- return list->minScore + 1;\r
- }\r
-\r
- strncpy (node->name, name, MAX_SCORE_NAME);\r
- node->name[MAX_SCORE_NAME - 1] = '\0';\r
- node->score = score;\r
-\r
- while (ptr && ptr->score >= score) {\r
- ptr = ptr->next;\r
- }\r
-\r
- if (list->first == NULL) {\r
- list->first = node;\r
- list->last = node;\r
- node->prev = NULL;\r
- node->next = NULL;\r
- } else if (ptr == NULL) {\r
- node->prev = list->last;\r
- node->next = NULL;\r
- node->prev->next = node;\r
- list->last = node;\r
- } else {\r
- node->prev = ptr->prev;\r
- node->next = ptr;\r
-\r
- if (node->prev == NULL) {\r
- list->first = node;\r
- } else {\r
- node->prev->next = node;\r
- }\r
- ptr->prev = node;\r
- }\r
-\r
- if (list->free == NULL) {\r
- list->minScore = list->last->score + 1;\r
- }\r
-\r
- return list->minScore;\r
-}\r
-\r
-void printResults (SCORE_LIST *list)\r
-{\r
- SCORE_NODE *ptr = list->first;\r
-\r
- printf ("Score Description\n");\r
-\r
- while (ptr) {\r
- printf ("%5d %s\n", ptr->score, ptr->name);\r
- ptr = ptr->next;\r
- }\r
-}\r
-\r
+++ /dev/null
-/******************************************************************\r
- Copyright 2006 by Michael Farrar. All rights reserved.\r
- This program may not be sold or incorporated into a commercial product,\r
- in whole or in part, without written consent of Michael Farrar. For \r
- further information regarding permission for use or reproduction, please \r
- contact: Michael Farrar at farrar.michael@gmail.com.\r
-*******************************************************************/\r
-\r
-/*\r
- Written by Michael Farrar, 2006.\r
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
-*/\r
-\r
-#ifndef INCLUDE_SWSSE2_H\r
-#define INCLUDE_SWSSE2_H\r
-\r
-typedef void SW_DATA;\r
-\r
-#define ALPHA_SIZE 23\r
-\r
-extern const char AMINO_ACIDS[ALPHA_SIZE];\r
-extern const int AMINO_ACID_VALUE[256];\r
-\r
-#define SHORT_BIAS 32768\r
-\r
-typedef struct {\r
- short gapInit;\r
- short gapExt;\r
- int threshold;\r
-} SEARCH_OPTIONS;\r
-\r
-#define MAX_SCORE_NAME 64\r
-typedef struct SCORE_STRUCT {\r
- int score;\r
- char name[MAX_SCORE_NAME];\r
- struct SCORE_STRUCT *prev;\r
- struct SCORE_STRUCT *next;\r
-} SCORE_NODE;\r
-\r
-typedef struct {\r
- int minScore;\r
- SCORE_NODE *first;\r
- SCORE_NODE *last;\r
- SCORE_NODE *free;\r
- void *buffer;\r
-} SCORE_LIST;\r
-\r
-int insertList (SCORE_LIST *list, int score, char *name);\r
-\r
-#endif /* INCLUDE_SWSSE2_H */\r
+++ /dev/null
-/******************************************************************\r
- Copyright 2006 by Michael Farrar. All rights reserved.\r
- This program may not be sold or incorporated into a commercial product,\r
- in whole or in part, without written consent of Michael Farrar. For \r
- further information regarding permission for use or reproduction, please \r
- contact: Michael Farrar at farrar.michael@gmail.com.\r
-*******************************************************************/\r
-\r
-/*\r
- Written by Michael Farrar, 2006.\r
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
-*/\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <emmintrin.h>\r
-\r
-#include "swsse2.h"\r
-#include "swstriped.h"\r
-\r
-void *\r
-swStripedInit(unsigned char *querySeq,\r
- int queryLength,\r
- signed char *matrix)\r
-{\r
- int i, j, k;\r
-\r
- int segSize;\r
- int nCount;\r
-\r
- int bias;\r
-\r
- int lenQryByte;\r
- int lenQryShort;\r
-\r
- int weight;\r
-\r
- short *ps;\r
- char *pc;\r
-\r
- signed char *matrixRow;\r
-\r
- size_t aligned;\r
-\r
- SwStripedData *pSwData;\r
- \r
- lenQryByte = (queryLength + 15) / 16;\r
- lenQryShort = (queryLength + 7) / 8;\r
-\r
- pSwData = (SwStripedData *) malloc (sizeof (SwStripedData));\r
- if (!pSwData) {\r
- fprintf (stderr, "Unable to allocate memory for SW data\n");\r
- exit (-1);\r
- }\r
-\r
- nCount = 64 + /* slack bytes */\r
- lenQryByte * ALPHA_SIZE + /* query profile byte */\r
- lenQryShort * ALPHA_SIZE + /* query profile short */\r
- (lenQryShort * 3); /* vH1, vH2 and vE */\r
-\r
- pSwData->pData = (unsigned char *) calloc (nCount, sizeof (__m128i));\r
- if (!pSwData->pData) {\r
- fprintf (stderr, "Unable to allocate memory for SW data buffers\n");\r
- exit (-1);\r
- }\r
-\r
- /* since we might port this to another platform, lets align the data */\r
- /* to 16 byte boundries ourselves */\r
- aligned = ((size_t) pSwData->pData + 15) & ~(0x0f);\r
-\r
- pSwData->pvbQueryProf = (__m128i *) aligned;\r
- pSwData->pvsQueryProf = pSwData->pvbQueryProf + lenQryByte * ALPHA_SIZE;\r
-\r
- pSwData->pvH1 = pSwData->pvsQueryProf + lenQryShort * ALPHA_SIZE;\r
- pSwData->pvH2 = pSwData->pvH1 + lenQryShort;\r
- pSwData->pvE = pSwData->pvH2 + lenQryShort;\r
-\r
- /* Use a scoring profile for the SSE2 implementation, but the layout\r
- * is a bit strange. The scoring profile is parallel to the query, but is\r
- * accessed in a stripped pattern. The query is divided into equal length\r
- * segments. The number of segments is equal to the number of elements\r
- * processed in the SSE2 register. For 8-bit calculations, the query will\r
- * be divided into 16 equal length parts. If the query is not long enough\r
- * to fill the last segment, it will be filled with neutral weights. The\r
- * first element in the SSE register will hold a value from the first segment,\r
- * the second element of the SSE register will hold a value from the\r
- * second segment and so on. So if the query length is 288, then each\r
- * segment will have a length of 18. So the first 16 bytes will have\r
- * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will\r
- * have the following weights: Q2, Q20, Q38, ... Q272; and so on until\r
- * all parts of all segments have been written. The last seqment will\r
- * have the following weights: Q18, Q36, Q54, ... Q288. This will be\r
- * done for the entire alphabet.\r
- */\r
-\r
- /* Find the bias to use in the substitution matrix */\r
- bias = 127;\r
- for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) {\r
- if (matrix[i] < bias) {\r
- bias = matrix[i];\r
- }\r
- }\r
- if (bias > 0) {\r
- bias = 0;\r
- }\r
-\r
- /* Fill in the byte query profile */\r
- pc = (char *) pSwData->pvbQueryProf;\r
- segSize = (queryLength + 15) / 16;\r
- nCount = segSize * 16;\r
- for (i = 0; i < ALPHA_SIZE; ++i) {\r
- matrixRow = matrix + i * ALPHA_SIZE;\r
- for (j = 0; j < segSize; ++j) {\r
- for (k = j; k < nCount; k += segSize) {\r
- if (k >= queryLength) {\r
- weight = 0;\r
- } else {\r
- weight = matrixRow[*(querySeq + k)];\r
- }\r
- *pc++ = (char) (weight - bias);\r
- }\r
- }\r
- }\r
-\r
- /* Fill in the short query profile */\r
- ps = (short *) pSwData->pvsQueryProf;\r
- segSize = (queryLength + 7) / 8;\r
- nCount = segSize * 8;\r
- for (i = 0; i < ALPHA_SIZE; ++i) {\r
- matrixRow = matrix + i * ALPHA_SIZE;\r
- for (j = 0; j < segSize; ++j) {\r
- for (k = j; k < nCount; k += segSize) {\r
- if (k >= queryLength) {\r
- weight = 0;\r
- } else {\r
- weight = matrixRow[*(querySeq + k)];\r
- }\r
- *ps++ = (unsigned short) weight;\r
- }\r
- }\r
- }\r
-\r
- pSwData->bias = (unsigned short) -bias;\r
-\r
- return pSwData;\r
-}\r
-\r
-void swStripedScan (unsigned char *querySeq,\r
- int queryLength,\r
- FASTA_LIB *dbLib,\r
- void *swData,\r
- SEARCH_OPTIONS *options,\r
- SCORE_LIST *scores)\r
-{\r
- int score;\r
-\r
- int threshold = options->threshold;\r
-\r
- unsigned char *dbSeq;\r
- int dbLen;\r
-\r
- int gapInit = -(options->gapInit + options->gapExt);\r
- int gapExt = -options->gapExt;\r
-\r
- SwStripedData *stripedData = (SwStripedData *) swData;\r
-\r
- dbSeq = nextSeq (dbLib, &dbLen);\r
- while (dbLen > 0) {\r
-\r
- score = swStripedByte (querySeq, queryLength, \r
- dbSeq, dbLen, \r
- gapInit, gapExt, \r
- stripedData->pvbQueryProf,\r
- stripedData->pvH1,\r
- stripedData->pvH2,\r
- stripedData->pvE,\r
- stripedData->bias);\r
-\r
- /* check if needs a run with higher precision */\r
- if (score >= 255) {\r
- score = swStripedWord (querySeq, queryLength, \r
- dbSeq, dbLen, \r
- gapInit, gapExt, \r
- stripedData->pvsQueryProf,\r
- stripedData->pvH1,\r
- stripedData->pvH2,\r
- stripedData->pvE);\r
- }\r
-\r
- if (score >= threshold) {\r
- int minScore = insertList (scores, score, seqName (dbLib));\r
- if (minScore >= threshold) {\r
- threshold = minScore;\r
- }\r
- }\r
-\r
- dbSeq = nextSeq (dbLib, &dbLen);\r
- }\r
-}\r
-\r
-void\r
-swStripedComplete(void *pSwData)\r
-{\r
- SwStripedData *pStripedData = (SwStripedData *) pSwData;\r
-\r
- free (pStripedData->pData);\r
- free (pStripedData);\r
-}\r
-\r
-int\r
-swStripedWord(unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- __m128i *pvQueryProf,\r
- __m128i *pvHLoad,\r
- __m128i *pvHStore,\r
- __m128i *pvE)\r
-{\r
- int i, j;\r
- int score;\r
-\r
- int cmp;\r
- int iter = (queryLength + 7) / 8;\r
- \r
- __m128i *pv;\r
-\r
- __m128i vE, vF, vH;\r
-\r
- __m128i vMaxScore;\r
- __m128i vGapOpen;\r
- __m128i vGapExtend;\r
-\r
- __m128i vMin;\r
- __m128i vMinimums;\r
- __m128i vTemp;\r
-\r
- __m128i *pvScore;\r
-\r
- /* remove unreferenced warning */\r
- querySeq;\r
-\r
- /* Load gap opening penalty to all elements of a constant */\r
- vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0);\r
- vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
- vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
-\r
- /* Load gap extension penalty to all elements of a constant */\r
- vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0);\r
- vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
- vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
-\r
- /* load vMaxScore with the zeros. since we are using signed */\r
- /* math, we will bias the maxscore to -32768 so we have the */\r
- /* full range of the short. */\r
- vMaxScore = _mm_cmpeq_epi16 (vMaxScore, vMaxScore);\r
- vMaxScore = _mm_slli_epi16 (vMaxScore, 15);\r
-\r
- vMinimums = _mm_shuffle_epi32 (vMaxScore, 0);\r
-\r
- vMin = _mm_shuffle_epi32 (vMaxScore, 0);\r
- vMin = _mm_srli_si128 (vMin, 14);\r
-\r
- /* Zero out the storage vector */\r
- for (i = 0; i < iter; i++)\r
- {\r
- _mm_store_si128 (pvE + i, vMaxScore);\r
- _mm_store_si128 (pvHStore + i, vMaxScore);\r
- }\r
-\r
- for (i = 0; i < dbLength; ++i)\r
- {\r
- /* fetch first data asap. */\r
- pvScore = pvQueryProf + dbSeq[i] * iter;\r
-\r
- /* zero out F. */\r
- vF = _mm_cmpeq_epi16 (vF, vF);\r
- vF = _mm_slli_epi16 (vF, 15);\r
-\r
- /* load the next h value */\r
- vH = _mm_load_si128 (pvHStore + iter - 1);\r
- vH = _mm_slli_si128 (vH, 2);\r
- vH = _mm_or_si128 (vH, vMin);\r
-\r
- pv = pvHLoad;\r
- pvHLoad = pvHStore;\r
- pvHStore = pv;\r
-\r
- for (j = 0; j < iter; j++)\r
- {\r
- /* load values of vF and vH from previous row (one unit up) */\r
- vE = _mm_load_si128 (pvE + j);\r
-\r
- /* add score to vH */\r
- vH = _mm_adds_epi16 (vH, *pvScore++);\r
-\r
- /* Update highest score encountered this far */\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vH);\r
-\r
- /* get max from vH, vE and vF */\r
- vH = _mm_max_epi16 (vH, vE);\r
- vH = _mm_max_epi16 (vH, vF);\r
-\r
- /* save vH values */\r
- _mm_store_si128 (pvHStore + j, vH);\r
-\r
- /* update vE value */\r
- vH = _mm_subs_epi16 (vH, vGapOpen);\r
- vE = _mm_subs_epi16 (vE, vGapExtend);\r
- vE = _mm_max_epi16 (vE, vH);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epi16 (vF, vGapExtend);\r
- vF = _mm_max_epi16 (vF, vH);\r
-\r
- /* save vE values */\r
- _mm_store_si128 (pvE + j, vE);\r
-\r
- /* load the next h value */\r
- vH = _mm_load_si128 (pvHLoad + j);\r
- }\r
-\r
- /* reset pointers to the start of the saved data */\r
- j = 0;\r
- vH = _mm_load_si128 (pvHStore + j);\r
-\r
- /* the computed vF value is for the given column. since */\r
- /* we are at the end, we need to shift the vF value over */\r
- /* to the next column. */\r
- vF = _mm_slli_si128 (vF, 2);\r
- vF = _mm_or_si128 (vF, vMin);\r
- vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
- vTemp = _mm_cmpgt_epi16 (vF, vTemp);\r
- cmp = _mm_movemask_epi8 (vTemp);\r
- while (cmp != 0x0000) \r
- {\r
- vE = _mm_load_si128 (pvE + j);\r
-\r
- vH = _mm_max_epi16 (vH, vF);\r
-\r
- /* save vH values */\r
- _mm_store_si128 (pvHStore + j, vH);\r
-\r
- /* update vE incase the new vH value would change it */\r
- vH = _mm_subs_epi16 (vH, vGapOpen);\r
- vE = _mm_max_epi16 (vE, vH);\r
- _mm_store_si128 (pvE + j, vE);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epi16 (vF, vGapExtend);\r
-\r
- j++;\r
- if (j >= iter)\r
- {\r
- j = 0;\r
- vF = _mm_slli_si128 (vF, 2);\r
- vF = _mm_or_si128 (vF, vMin);\r
- }\r
-\r
- vH = _mm_load_si128 (pvHStore + j);\r
-\r
- vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
- vTemp = _mm_cmpgt_epi16 (vF, vTemp);\r
- cmp = _mm_movemask_epi8 (vTemp);\r
- }\r
- }\r
-\r
- /* find largest score in the vMaxScore vector */\r
- vTemp = _mm_srli_si128 (vMaxScore, 8);\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 4);\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 2);\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
-\r
- /* store in temporary variable */\r
- score = (short) _mm_extract_epi16 (vMaxScore, 0);\r
-\r
- /* return largest score */\r
- return score + SHORT_BIAS;\r
-}\r
-\r
-\r
-\r
-\r
-int\r
-swStripedByte(unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- __m128i *pvQueryProf,\r
- __m128i *pvHLoad,\r
- __m128i *pvHStore,\r
- __m128i *pvE,\r
- unsigned short bias)\r
-{\r
- int i, j;\r
- int score;\r
-\r
- int dup;\r
- int cmp;\r
- int iter = (queryLength + 15) / 16;\r
- \r
- __m128i *pv;\r
-\r
- __m128i vE, vF, vH;\r
-\r
- __m128i vMaxScore;\r
- __m128i vBias;\r
- __m128i vGapOpen;\r
- __m128i vGapExtend;\r
-\r
- __m128i vTemp;\r
- __m128i vZero;\r
-\r
- __m128i *pvScore;\r
-\r
- /* remove unreferenced warning */\r
- querySeq;\r
-\r
- /* Load the bias to all elements of a constant */\r
- dup = (bias << 8) | (bias & 0x00ff);\r
- vBias = _mm_insert_epi16 (vBias, dup, 0);\r
- vBias = _mm_shufflelo_epi16 (vBias, 0);\r
- vBias = _mm_shuffle_epi32 (vBias, 0);\r
-\r
- /* Load gap opening penalty to all elements of a constant */\r
- dup = (gapOpen << 8) | (gapOpen & 0x00ff);\r
- vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0);\r
- vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
- vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
-\r
- /* Load gap extension penalty to all elements of a constant */\r
- dup = (gapExtend << 8) | (gapExtend & 0x00ff);\r
- vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0);\r
- vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
- vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
-\r
- vMaxScore = _mm_xor_si128 (vMaxScore, vMaxScore);\r
-\r
- vZero = _mm_xor_si128 (vZero, vZero);\r
-\r
- /* Zero out the storage vector */\r
- for (i = 0; i < iter; i++)\r
- {\r
- _mm_store_si128 (pvE + i, vMaxScore);\r
- _mm_store_si128 (pvHStore + i, vMaxScore);\r
- }\r
-\r
- for (i = 0; i < dbLength; ++i)\r
- {\r
- /* fetch first data asap. */\r
- pvScore = pvQueryProf + dbSeq[i] * iter;\r
-\r
- /* zero out F. */\r
- vF = _mm_xor_si128 (vF, vF);\r
-\r
- /* load the next h value */\r
- vH = _mm_load_si128 (pvHStore + iter - 1);\r
- vH = _mm_slli_si128 (vH, 1);\r
-\r
- pv = pvHLoad;\r
- pvHLoad = pvHStore;\r
- pvHStore = pv;\r
-\r
- for (j = 0; j < iter; j++)\r
- {\r
- /* load values of vF and vH from previous row (one unit up) */\r
- vE = _mm_load_si128 (pvE + j);\r
-\r
- /* add score to vH */\r
- vH = _mm_adds_epu8 (vH, *(pvScore + j));\r
- vH = _mm_subs_epu8 (vH, vBias);\r
-\r
- /* Update highest score encountered this far */\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vH);\r
-\r
- /* get max from vH, vE and vF */\r
- vH = _mm_max_epu8 (vH, vE);\r
- vH = _mm_max_epu8 (vH, vF);\r
-\r
- /* save vH values */\r
- _mm_store_si128 (pvHStore + j, vH);\r
-\r
- /* update vE value */\r
- vH = _mm_subs_epu8 (vH, vGapOpen);\r
- vE = _mm_subs_epu8 (vE, vGapExtend);\r
- vE = _mm_max_epu8 (vE, vH);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epu8 (vF, vGapExtend);\r
- vF = _mm_max_epu8 (vF, vH);\r
-\r
- /* save vE values */\r
- _mm_store_si128 (pvE + j, vE);\r
-\r
- /* load the next h value */\r
- vH = _mm_load_si128 (pvHLoad + j);\r
- }\r
-\r
- /* reset pointers to the start of the saved data */\r
- j = 0;\r
- vH = _mm_load_si128 (pvHStore + j);\r
-\r
- /* the computed vF value is for the given column. since */\r
- /* we are at the end, we need to shift the vF value over */\r
- /* to the next column. */\r
- vF = _mm_slli_si128 (vF, 1);\r
- vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
- vTemp = _mm_subs_epu8 (vF, vTemp);\r
- vTemp = _mm_cmpeq_epi8 (vTemp, vZero);\r
- cmp = _mm_movemask_epi8 (vTemp);\r
-\r
- while (cmp != 0xffff) \r
- {\r
- vE = _mm_load_si128 (pvE + j);\r
-\r
- vH = _mm_max_epu8 (vH, vF);\r
-\r
- /* save vH values */\r
- _mm_store_si128 (pvHStore + j, vH);\r
-\r
- /* update vE incase the new vH value would change it */\r
- vH = _mm_subs_epu8 (vH, vGapOpen);\r
- vE = _mm_max_epu8 (vE, vH);\r
- _mm_store_si128 (pvE + j, vE);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epu8 (vF, vGapExtend);\r
-\r
- j++;\r
- if (j >= iter)\r
- {\r
- j = 0;\r
- vF = _mm_slli_si128 (vF, 1);\r
- }\r
-\r
- vH = _mm_load_si128 (pvHStore + j);\r
-\r
- vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
- vTemp = _mm_subs_epu8 (vF, vTemp);\r
- vTemp = _mm_cmpeq_epi8 (vTemp, vZero);\r
- cmp = _mm_movemask_epi8 (vTemp);\r
- }\r
- }\r
-\r
- /* find largest score in the vMaxScore vector */\r
- vTemp = _mm_srli_si128 (vMaxScore, 8);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 4);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 2);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 1);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
-\r
- /* store in temporary variable */\r
- score = _mm_extract_epi16 (vMaxScore, 0);\r
- score = score & 0x00ff;\r
-\r
- /* check if we might have overflowed */\r
- if (score + bias >= 255)\r
- {\r
- score = 255;\r
- }\r
-\r
- /* return largest score */\r
- return score;\r
-}\r
+++ /dev/null
-/******************************************************************
- Copyright 2006 by Michael Farrar. All rights reserved.
- This program may not be sold or incorporated into a commercial product,
- in whole or in part, without written consent of Michael Farrar. For
- further information regarding permission for use or reproduction, please
- contact: Michael Farrar at farrar.michael@gmail.com.
-*******************************************************************/
-
-/*
- Written by Michael Farrar, 2006.
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.
-*/
-
-#ifndef INCLUDE_SWSTRIPED_H
-#define INCLUDE_SWSTRIPED_H
-
-#include <emmintrin.h>
-
-#include "swsse2.h"
-#include "fastalib.h"
-
-SW_DATA *
-swStripedInit (unsigned char *querySeq,
- int queryLength,
- signed char *matrix);
-
-
-void
-swStripedScan (unsigned char *querySeq,
- int queryLength,
- FASTA_LIB *dbLib,
- void *swData,
- SEARCH_OPTIONS *options,
- SCORE_LIST *scores);
-
-
-void
-swStripedComplete (SW_DATA *pSwData);
-
-int
-swStripedWord (unsigned char *querySeq,
- int queryLength,
- unsigned char *dbSeq,
- int dbLength,
- unsigned short gapOpen,
- unsigned short gapExtend,
- __m128i *queryProf,
- __m128i *pvH1,
- __m128i *pvH2,
- __m128i *pvE);
-
-
-int
-swStripedByte (unsigned char *querySeq,
- int queryLength,
- unsigned char *dbSeq,
- int dbLength,
- unsigned short gapOpen,
- unsigned short gapExtend,
- __m128i *queryProf,
- __m128i *pvH1,
- __m128i *pvH2,
- __m128i *pvE,
- unsigned short bias);
-
-typedef struct {
- __m128i *pvbQueryProf;
- __m128i *pvsQueryProf;
- __m128i *pvH1;
- __m128i *pvH2;
- __m128i *pvE;
- unsigned char *pData;
- unsigned short bias;
-} SwStripedData;
-
-
-
-#endif /* INCLUDE_SWSTRIPED_H */
+++ /dev/null
-/******************************************************************\r
- Copyright 2006 by Michael Farrar. All rights reserved.\r
- This program may not be sold or incorporated into a commercial product,\r
- in whole or in part, without written consent of Michael Farrar. For \r
- further information regarding permission for use or reproduction, please \r
- contact: Michael Farrar at farrar.michael@gmail.com.\r
-*******************************************************************/\r
-\r
-/* Written by Michael Farrar, 2006.\r
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
-*/\r
-\r
-/* Implementation of the Wozniak "vertical" vectorization\r
- strategy for Smith-Waterman comparison, Wozniak (1997) Comp.\r
- Appl. Biosci. 13:145-150\r
-*/\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <emmintrin.h>\r
-\r
-#include "swsse2.h"\r
-#include "swwozniak.h"\r
-\r
-#define MATRIX_ROW_SIZE 32\r
-#define MATRIX_SIZE (MATRIX_ROW_SIZE * (ALPHA_SIZE + 1))\r
-\r
-typedef struct {\r
- char *pbMatrix;\r
- short *psMatrix;\r
- __m128i *pvHStore;\r
- __m128i *pvEStore;\r
- unsigned char *pData;\r
- unsigned short bias;\r
-} SwWozniakData;\r
-\r
-int\r
-swWozniakWord (unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- short *pMatrix,\r
- __m128i *pvHStore,\r
- __m128i *pvEStore);\r
-\r
-int\r
-swWozniakByte (unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- char *pMatrix,\r
- __m128i *pvHStore,\r
- __m128i *pvEStore,\r
- unsigned short bias);\r
-\r
-void *\r
-swWozniakInit(unsigned char *querySeq,\r
- int queryLength,\r
- signed char *matrix)\r
-{\r
- int i, j;\r
-\r
- int nCount;\r
-\r
- int lenQryByte;\r
- int lenQryShort;\r
-\r
- int bias;\r
-\r
- int weight;\r
-\r
- short *ps;\r
- char *pc;\r
-\r
- signed char *matrixRow;\r
-\r
- size_t aligned;\r
-\r
- SwWozniakData *pSwData;\r
- \r
- lenQryByte = (queryLength + 15) / 16 + 2;\r
- lenQryShort = (queryLength + 7) / 8 + 2;\r
-\r
- pSwData = (SwWozniakData *) malloc (sizeof (SwWozniakData));\r
- if (!pSwData) {\r
- fprintf (stderr, "Unable to allocate memory for SW data\n");\r
- exit (-1);\r
- }\r
-\r
- nCount = 64 + /* slack bytes */\r
- 4 * (ALPHA_SIZE + 1) + /* byte matrix */\r
- 4 * (ALPHA_SIZE + 1) + /* short matrix */\r
- ((queryLength + 16) * 2); /* vH and vE */\r
-\r
-\r
- pSwData->pData = (unsigned char *) calloc (nCount, sizeof (__m128i));\r
- if (!pSwData->pData) {\r
- fprintf (stderr, "Unable to allocate memory for SW data buffers\n");\r
- exit (-1);\r
- }\r
-\r
- pSwData->pbMatrix = (char *) pSwData->pData;\r
- pSwData->psMatrix = (short *) (pSwData->pbMatrix + MATRIX_SIZE);\r
-\r
- /* since we might port this to another platform, lets align the data */\r
- /* to 16 byte boundries ourselves */\r
- aligned = (size_t) (pSwData->psMatrix + MATRIX_SIZE);\r
- aligned = (aligned + 15) & ~(0x0f);\r
-\r
- pSwData->pvHStore = (__m128i *) aligned;\r
- pSwData->pvEStore = pSwData->pvHStore + queryLength + 16;\r
-\r
- /* Find the bias to use in the substitution matrix */\r
- bias = 127;\r
- for (i = 0; i < ALPHA_SIZE * ALPHA_SIZE; i++) {\r
- if (matrix[i] < bias) {\r
- bias = matrix[i];\r
- }\r
- }\r
- if (bias > 0) {\r
- bias = 0;\r
- }\r
-\r
- pc = pSwData->pbMatrix;\r
- ps = pSwData->psMatrix;\r
-\r
- for (i = 0; i < ALPHA_SIZE; i++) {\r
- matrixRow = matrix + i * ALPHA_SIZE;\r
-\r
- for (j = 0; j < ALPHA_SIZE; j++) {\r
- weight = matrixRow[j];\r
- *pc++ = weight - bias;\r
- *ps++ = weight;\r
- }\r
-\r
- for ( ; j < MATRIX_ROW_SIZE; j++) {\r
- *pc++ = -bias;\r
- *ps++ = 0;\r
- }\r
- }\r
-\r
- /* add the weights for the NULL rows */\r
- for (j = 0; j < MATRIX_ROW_SIZE; j++) {\r
- *pc++ = -bias;\r
- *ps++ = 0;\r
- }\r
-\r
- pSwData->bias = (unsigned short) -bias;\r
-\r
- return pSwData;\r
-}\r
-\r
-void swWozniakScan (unsigned char *querySeq,\r
- int queryLength,\r
- FASTA_LIB *dbLib,\r
- void *swData,\r
- SEARCH_OPTIONS *options,\r
- SCORE_LIST *scores)\r
-{\r
- int score;\r
-\r
- int threshold = options->threshold;\r
-\r
- unsigned char *dbSeq;\r
- int dbLen;\r
-\r
- int gapInit = -(options->gapInit + options->gapExt);\r
- int gapExt = -options->gapExt;\r
-\r
- SwWozniakData *wozniakData = (SwWozniakData *) swData;\r
-\r
- dbSeq = nextSeq (dbLib, &dbLen);\r
- while (dbLen > 0) {\r
-\r
- score = swWozniakByte (querySeq, queryLength, \r
- dbSeq, dbLen, \r
- gapInit, gapExt, \r
- wozniakData->pbMatrix,\r
- wozniakData->pvHStore,\r
- wozniakData->pvEStore,\r
- wozniakData->bias);\r
-\r
- /* check if needs a run with higher precision */\r
- if (score >= 255) {\r
- score = swWozniakWord (querySeq, queryLength, \r
- dbSeq, dbLen, \r
- gapInit, gapExt, \r
- wozniakData->psMatrix,\r
- wozniakData->pvHStore,\r
- wozniakData->pvEStore);\r
- }\r
-\r
- if (score >= threshold) {\r
- int minScore = insertList (scores, score, seqName (dbLib));\r
- if (minScore >= threshold) {\r
- threshold = minScore;\r
- }\r
- }\r
-\r
- dbSeq = nextSeq (dbLib, &dbLen);\r
- }\r
-}\r
-\r
-void\r
-swWozniakComplete(void *pSwData)\r
-{\r
- SwWozniakData *pWozniakData = (SwWozniakData *) pSwData;\r
-\r
- free (pWozniakData->pData);\r
- free (pWozniakData);\r
-}\r
-\r
-\r
-int\r
-swWozniakWord (unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- short *pMatrix,\r
- __m128i *pvHStore,\r
- __m128i *pvEStore)\r
-{\r
- int i, j, k, l, m;\r
- int score;\r
-\r
- short *pScore;\r
-\r
- __m128i vE, vF, vH;\r
- __m128i vEUp, vHUp1, vHUp2;\r
-\r
- __m128i vMaxScore;\r
- __m128i vGapOpen;\r
- __m128i vGapExtend;\r
- __m128i vScore;\r
-\r
- __m128i vMin;\r
- __m128i vMinimums;\r
- __m128i vTemp;\r
-\r
- /* remove unreferenced warning */\r
- querySeq;\r
-\r
- /* Load gap opening penalty to all elements of a constant */\r
- vGapOpen = _mm_insert_epi16 (vGapOpen, gapOpen, 0);\r
- vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
- vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
-\r
- /* Load gap extension penalty to all elements of a constant */\r
- vGapExtend = _mm_insert_epi16 (vGapExtend, gapExtend, 0);\r
- vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
- vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
-\r
- /* load vMaxScore with the zeros. since we are using signed */\r
- /* math, we will bias the maxscore to -32768 so we have the */\r
- /* full range of the short. */\r
- vMaxScore = _mm_cmpeq_epi16 (vMaxScore, vMaxScore);\r
- vMaxScore = _mm_slli_epi16 (vMaxScore, 15);\r
-\r
- vMinimums = _mm_shuffle_epi32 (vMaxScore, 0);\r
-\r
- vMin = _mm_shuffle_epi32 (vMaxScore, 0);\r
- vMin = _mm_srli_si128 (vMin, 14);\r
-\r
- for (i = 0; i < queryLength + 8; i++)\r
- {\r
- _mm_store_si128 (pvEStore + i, vMaxScore);\r
- _mm_store_si128 (pvHStore + i, vMaxScore);\r
- }\r
-\r
- pScore = (short *) &vScore;\r
-\r
- for (i = 0; i < dbLength; i += 8, dbSeq += 8)\r
- {\r
- /* zero lots of stuff. */\r
- vE = _mm_shuffle_epi32 (vMinimums, 0);\r
- vF = _mm_shuffle_epi32 (vMinimums, 0);\r
- vH = _mm_shuffle_epi32 (vMinimums, 0);\r
- vHUp2 = _mm_shuffle_epi32 (vMinimums, 0);\r
-\r
- vScore = _mm_xor_si128 (vScore, vScore);\r
-\r
- for (j = 0; j < 8; ++j)\r
- {\r
- for (k = 0; k <= j; ++k) {\r
- int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
- pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
- }\r
- for ( ; k < 8; ++k) {\r
- pScore[k] = 0;\r
- }\r
-\r
- /* load values of vE and vH from previous row (one unit up) */\r
- vEUp = _mm_load_si128 (pvEStore + j);\r
- vHUp1 = _mm_load_si128 (pvHStore + j);\r
-\r
- /* shift into place so we have complete vE and vH vectors */\r
- /* that refer to the values one unit up from each cell */\r
- /* that we are currently working on. */\r
- vTemp = _mm_slli_si128 (vE, 2);\r
- vEUp = _mm_srli_si128 (vEUp, 14);\r
- vEUp = _mm_or_si128 (vEUp, vTemp);\r
-\r
- vTemp = _mm_slli_si128 (vH, 2);\r
- vHUp1 = _mm_srli_si128 (vHUp1, 14);\r
- vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
-\r
- /* do the dynamic programming */\r
-\r
- /* update vE value */\r
- vE = _mm_subs_epi16 (vEUp, vGapExtend);\r
- vTemp = _mm_subs_epi16 (vHUp1, vGapOpen);\r
- vE = _mm_max_epi16 (vE, vTemp);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epi16 (vF, vGapExtend);\r
- vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
- vF = _mm_max_epi16 (vF, vTemp);\r
-\r
- /* add score to vH */\r
- vH = _mm_adds_epi16 (vHUp2, vScore);\r
-\r
- /* set vH to max of vH, vE, vF */\r
- vH = _mm_max_epi16 (vH, vE);\r
- vH = _mm_max_epi16 (vH, vF);\r
-\r
- /* Save value to use for next diagonal vH */\r
- vHUp2 = vHUp1;\r
-\r
- /* Update highest score encountered this far */\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vH);\r
- }\r
-\r
- for (l = 0; j < queryLength; ++j, ++l)\r
- {\r
- for (k = 0; k < 8; ++k) {\r
- int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
- pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
- }\r
-\r
- /* load values of vE and vH from previous row (one unit up) */\r
- vEUp = _mm_load_si128 (pvEStore + j);\r
- vHUp1 = _mm_load_si128 (pvHStore + j);\r
-\r
- /* save old values of vE and vH to use on next row */\r
- _mm_store_si128 (pvEStore + l, vE);\r
- _mm_store_si128 (pvHStore + l, vH);\r
- \r
- /* shift into place so we have complete vE and vH vectors */\r
- /* that refer to the values one unit up from each cell */\r
- /* that we are currently working on. */\r
- vTemp = _mm_slli_si128 (vE, 2);\r
- vEUp = _mm_srli_si128 (vEUp, 14);\r
- vEUp = _mm_or_si128 (vEUp, vTemp);\r
-\r
- vTemp = _mm_slli_si128 (vH, 2);\r
- vHUp1 = _mm_srli_si128 (vHUp1, 14);\r
- vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
-\r
- /* do the dynamic programming */\r
-\r
- /* update vE value */\r
- vE = _mm_subs_epi16 (vEUp, vGapExtend);\r
- vTemp = _mm_subs_epi16 (vHUp1, vGapOpen);\r
- vE = _mm_max_epi16 (vE, vTemp);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epi16 (vF, vGapExtend);\r
- vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
- vF = _mm_max_epi16 (vF, vTemp);\r
-\r
- /* add score to vH */\r
- vH = _mm_adds_epi16(vHUp2, vScore);\r
-\r
- /* set vH to max of vH, vE, vF */\r
- vH = _mm_max_epi16 (vH, vE);\r
- vH = _mm_max_epi16 (vH, vF);\r
-\r
- /* Save value to use for next diagonal vH */\r
- vHUp2 = vHUp1;\r
-\r
- /* Update highest score encountered this far */\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vH);\r
- }\r
-\r
- for (m = 0 ; m < 7; ++j, ++l, ++m)\r
- {\r
- for (k = 0; k <= m; ++k) {\r
- pScore[k] = 0;\r
- }\r
- for ( ; k < 8; ++k) {\r
- int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
- pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
- }\r
-\r
- /* save old values of vE and vH to use on next row */\r
- _mm_store_si128 (pvEStore + l, vE);\r
- _mm_store_si128 (pvHStore + l, vH);\r
-\r
- /* v_score_load contains all zeros */\r
- vTemp = _mm_slli_si128 (vE, 2);\r
- vEUp = _mm_or_si128 (vMin, vTemp);\r
- vTemp = _mm_slli_si128 (vH, 2);\r
- vHUp1 = _mm_or_si128 (vMin, vTemp);\r
-\r
- /* do the dynamic programming */\r
-\r
- /* update vE value */\r
- vE = _mm_subs_epi16 (vEUp, vGapExtend);\r
- vTemp = _mm_subs_epi16 (vHUp1, vGapOpen);\r
- vE = _mm_max_epi16 (vE, vTemp);\r
-\r
- /* update vF value */\r
- vF = _mm_subs_epi16 (vF, vGapExtend);\r
- vTemp = _mm_subs_epi16 (vH, vGapOpen);\r
- vF = _mm_max_epi16 (vF, vTemp);\r
-\r
- /* add score to vH */\r
- vH = _mm_adds_epi16 (vHUp2, vScore);\r
-\r
- /* set vH to max of vH, vE, vF */\r
- vH = _mm_max_epi16 (vH, vE);\r
- vH = _mm_max_epi16 (vH, vF);\r
-\r
- /* Save value to use for next diagonal vH */\r
- vHUp2 = vHUp1;\r
-\r
- /* Update highest score encountered this far */\r
- vMaxScore = _mm_max_epi16(vMaxScore,vH);\r
- }\r
-\r
- _mm_store_si128 (pvEStore + l, vE);\r
- _mm_store_si128 (pvHStore + l, vH);\r
- }\r
-\r
- /* find largest score in the vMaxScore vector */\r
- vTemp = _mm_srli_si128 (vMaxScore, 8);\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 4);\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 2);\r
- vMaxScore = _mm_max_epi16 (vMaxScore, vTemp);\r
-\r
- /* store in temporary variable */\r
- score = (short) _mm_extract_epi16 (vMaxScore, 0);\r
-\r
- /* return largest score */\r
- return score + SHORT_BIAS;\r
-}\r
-\r
-\r
-\r
-\r
-int\r
-swWozniakByte (unsigned char *querySeq,\r
- int queryLength,\r
- unsigned char *dbSeq,\r
- int dbLength,\r
- unsigned short gapOpen,\r
- unsigned short gapExtend,\r
- char *pMatrix,\r
- __m128i *pvHStore,\r
- __m128i *pvEStore,\r
- unsigned short bias)\r
-{\r
- int i, j, k, l, m;\r
- int score;\r
-\r
- int dup;\r
-\r
- char *pScore;\r
-\r
- __m128i vE, vF, vH;\r
- __m128i vEUp, vHUp1, vHUp2;\r
-\r
- __m128i vMaxScore;\r
- __m128i vBias;\r
- __m128i vGapOpen;\r
- __m128i vGapExtend;\r
- __m128i vScore;\r
-\r
- __m128i vTemp;\r
-\r
- /* remove unreferenced warning */\r
- querySeq;\r
-\r
- /* Load the bias to all elements of a constant */\r
- dup = (bias << 8) | (bias & 0x00ff);\r
- vBias = _mm_insert_epi16 (vBias, dup, 0);\r
- vBias = _mm_shufflelo_epi16 (vBias, 0);\r
- vBias = _mm_shuffle_epi32 (vBias, 0);\r
-\r
- /* Load gap opening penalty to all elements of a constant */\r
- dup = (gapOpen << 8) | (gapOpen & 0x00ff);\r
- vGapOpen = _mm_insert_epi16 (vGapOpen, dup, 0);\r
- vGapOpen = _mm_shufflelo_epi16 (vGapOpen, 0);\r
- vGapOpen = _mm_shuffle_epi32 (vGapOpen, 0);\r
-\r
- /* Load gap extension penalty to all elements of a constant */\r
- dup = (gapExtend << 8) | (gapExtend & 0x00ff);\r
- vGapExtend = _mm_insert_epi16 (vGapExtend, dup, 0);\r
- vGapExtend = _mm_shufflelo_epi16 (vGapExtend, 0);\r
- vGapExtend = _mm_shuffle_epi32 (vGapExtend, 0);\r
-\r
- vScore = _mm_xor_si128 (vScore, vScore);\r
- vMaxScore = _mm_xor_si128 (vMaxScore, vMaxScore);\r
-\r
- for (i = 0; i < queryLength + 16; i++)\r
- {\r
- _mm_store_si128 (pvEStore + i, vMaxScore);\r
- _mm_store_si128 (pvHStore + i, vMaxScore);\r
- }\r
-\r
- pScore = (char *) &vScore;\r
-\r
- for (i = 0; i < dbLength; i += 16, dbSeq += 16)\r
- {\r
- // zero lots of stuff.\r
- vE = _mm_xor_si128 (vE, vE);\r
- vF = _mm_xor_si128 (vF, vF);\r
- vH = _mm_xor_si128 (vH, vH);\r
- vHUp2 = _mm_xor_si128 (vHUp2, vHUp2);\r
-\r
- vScore = _mm_xor_si128 (vScore, vScore);\r
-\r
- for (j = 0; j < 16; ++j)\r
- {\r
- for (k = 0; k <= j; ++k) {\r
- int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
- pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
- }\r
- for ( ; k < 16; ++k) {\r
- pScore[k] = (char) bias;\r
- }\r
-\r
- // load values of vE and vH from previous row (one unit up)\r
- vEUp = _mm_load_si128 (pvEStore + j);\r
- vHUp1 = _mm_load_si128 (pvHStore + j);\r
-\r
- // shift into place so we have complete vE and vH vectors\r
- // that refer to the values one unit up from each cell\r
- // that we are currently working on.\r
- vTemp = _mm_slli_si128 (vE, 1);\r
- vEUp = _mm_srli_si128 (vEUp, 15);\r
- vEUp = _mm_or_si128 (vEUp, vTemp);\r
-\r
- vTemp = _mm_slli_si128 (vH, 1);\r
- vHUp1 = _mm_srli_si128 (vHUp1, 15);\r
- vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
-\r
- // do the dynamic programming\r
-\r
- // update vE value\r
- vE = _mm_subs_epu8 (vEUp, vGapExtend);\r
- vTemp = _mm_subs_epu8 (vHUp1, vGapOpen);\r
- vE = _mm_max_epu8 (vE, vTemp);\r
-\r
- // update vF value\r
- vF = _mm_subs_epu8 (vF, vGapExtend);\r
- vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
- vF = _mm_max_epu8 (vF, vTemp);\r
-\r
- // add score to vH\r
- vH = _mm_adds_epu8 (vHUp2, *((__m128i *) pScore));\r
- vH = _mm_subs_epu8 (vH, vBias);\r
-\r
- // set vH to max of vH, vE, vF\r
- vH = _mm_max_epu8 (vH, vE);\r
- vH = _mm_max_epu8 (vH, vF);\r
-\r
- // Save value to use for next diagonal vH\r
- vHUp2 = vHUp1;\r
-\r
- // Update highest score encountered this far\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vH);\r
- }\r
-\r
- for (l = 0; j < queryLength; ++j, ++l)\r
- {\r
- for (k = 0; k < 16; ++k) {\r
- int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
- pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
- }\r
-\r
- // load values of vE and vH from previous row (one unit up)\r
- vEUp = _mm_load_si128 (pvEStore + j);\r
- vHUp1 = _mm_load_si128 (pvHStore + j);\r
-\r
- // save old values of vE and vH to use on next row\r
- _mm_store_si128 (pvEStore + l, vE);\r
- _mm_store_si128 (pvHStore + l, vH);\r
- \r
- // shift into place so we have complete vE and vH vectors\r
- // that refer to the values one unit up from each cell\r
- // that we are currently working on.\r
- vTemp = _mm_slli_si128 (vE, 1);\r
- vEUp = _mm_srli_si128 (vEUp, 15);\r
- vEUp = _mm_or_si128 (vEUp, vTemp);\r
-\r
- vTemp = _mm_slli_si128 (vH, 1);\r
- vHUp1 = _mm_srli_si128 (vHUp1, 15);\r
- vHUp1 = _mm_or_si128 (vHUp1, vTemp);\r
-\r
- // do the dynamic programming\r
-\r
- // update vE value\r
- vE = _mm_subs_epu8 (vEUp, vGapExtend);\r
- vTemp = _mm_subs_epu8 (vHUp1, vGapOpen);\r
- vE = _mm_max_epu8 (vE, vTemp);\r
-\r
- // update vF value\r
- vF = _mm_subs_epu8 (vF, vGapExtend);\r
- vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
- vF = _mm_max_epu8 (vF, vTemp);\r
-\r
- // add score to vH\r
- vH = _mm_adds_epu8(vHUp2, vScore);\r
- vH = _mm_subs_epu8 (vH, vBias);\r
-\r
- // set vH to max of vH, vE, vF\r
- vH = _mm_max_epu8 (vH, vE);\r
- vH = _mm_max_epu8 (vH, vF);\r
-\r
- // Save value to use for next diagonal vH\r
- vHUp2 = vHUp1;\r
-\r
- // Update highest score encountered this far\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vH);\r
- }\r
-\r
- for (m = 0 ; m < 15; ++j, ++l, ++m)\r
- {\r
- for (k = 0; k <= m; ++k) {\r
- pScore[k] = (char) bias;\r
- }\r
- for ( ; k < 16; ++k) {\r
- int matrixOffset = *(dbSeq + k) * MATRIX_ROW_SIZE;\r
- pScore[k] = *(pMatrix + matrixOffset + *(querySeq + j - k));\r
- }\r
-\r
- // save old values of vE and vH to use on next row\r
- _mm_store_si128 (pvEStore + l, vE);\r
- _mm_store_si128 (pvHStore + l, vH);\r
-\r
- // v_score_load contains all zeros\r
- vEUp = _mm_slli_si128 (vE, 1);\r
- vHUp1 = _mm_slli_si128 (vH, 1);\r
-\r
- // do the dynamic programming\r
-\r
- // update vE value\r
- vE = _mm_subs_epu8 (vEUp, vGapExtend);\r
- vTemp = _mm_subs_epu8 (vHUp1, vGapOpen);\r
- vE = _mm_max_epu8 (vE, vTemp);\r
-\r
- // update vF value\r
- vF = _mm_subs_epu8 (vF, vGapExtend);\r
- vTemp = _mm_subs_epu8 (vH, vGapOpen);\r
- vF = _mm_max_epu8 (vF, vTemp);\r
-\r
- // add score to vH\r
- vH = _mm_adds_epu8 (vHUp2, vScore);\r
- vH = _mm_subs_epu8 (vH, vBias);\r
-\r
- // set vH to max of vH, vE, vF\r
- vH = _mm_max_epu8 (vH, vE);\r
- vH = _mm_max_epu8 (vH, vF);\r
-\r
- // Save value to use for next diagonal vH\r
- vHUp2 = vHUp1;\r
-\r
- // Update highest score encountered this far\r
- vMaxScore = _mm_max_epu8(vMaxScore,vH);\r
- }\r
-\r
- _mm_store_si128 (pvEStore + l, vE);\r
- _mm_store_si128 (pvHStore + l, vH);\r
- }\r
-\r
- // find largest score in the vMaxScore vector\r
- vTemp = _mm_srli_si128 (vMaxScore, 8);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 4);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 2);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
- vTemp = _mm_srli_si128 (vMaxScore, 1);\r
- vMaxScore = _mm_max_epu8 (vMaxScore, vTemp);\r
-\r
- // store in temporary variable\r
- score = (short) _mm_extract_epi16 (vMaxScore, 0);\r
- score = score & 0x00ff;\r
-\r
- // check if we might have overflowed\r
- if (score + bias >= 255)\r
- {\r
- score = 255;\r
- }\r
-\r
-\r
- // return largest score\r
- return score;\r
-}\r
+++ /dev/null
-/******************************************************************\r
- Copyright 2006 by Michael Farrar. All rights reserved.\r
- This program may not be sold or incorporated into a commercial product,\r
- in whole or in part, without written consent of Michael Farrar. For \r
- further information regarding permission for use or reproduction, please \r
- contact: Michael Farrar at farrar.michael@gmail.com.\r
-*******************************************************************/\r
-\r
-/*\r
- Written by Michael Farrar, 2006.\r
- Please send bug reports and/or suggestions to farrar.michael@gmail.com.\r
-*/\r
-\r
-#ifndef INCLUDE_SWWOZNIAK_H\r
-#define INCLUDE_SWWOZNIAK_H\r
-\r
-#include "swsse2.h"\r
-#include "fastalib.h"\r
-\r
-#define MATRIX_ROW_SIZE 32\r
-\r
-SW_DATA *\r
-swWozniakInit (unsigned char *querySeq,\r
- int queryLength,\r
- signed char *matrix);\r
-\r
-\r
-void \r
-swWozniakScan (unsigned char *querySeq,\r
- int queryLength,\r
- FASTA_LIB *dbLib,\r
- void *swData,\r
- SEARCH_OPTIONS *options,\r
- SCORE_LIST *scores);\r
-\r
-void\r
-swWozniakComplete (SW_DATA *pSwData);\r
-\r
-\r
-#endif /* INCLUDE_SWWOZNIAK_H */\r