1 /******************************************************************
\r
2 Copyright 2006 by Michael Farrar. All rights reserved.
\r
3 This program may not be sold or incorporated into a commercial product,
\r
4 in whole or in part, without written consent of Michael Farrar. For
\r
5 further information regarding permission for use or reproduction, please
\r
6 contact: Michael Farrar at farrar.michael@gmail.com.
\r
7 *******************************************************************/
\r
10 Written by Michael Farrar, 2006.
\r
11 Please send bug reports and/or suggestions to farrar.michael@gmail.com.
\r
19 #include <sys/timeb.h>
\r
23 #include "fastalib.h"
\r
25 #include "swscalar.h"
\r
26 #include "swwozniak.h"
\r
28 #include "swrognes.h"
\r
30 #include "swstriped.h"
\r
32 typedef enum { SCALAR,
\r
40 const char *SW_IMPLEMENATION[] = {
\r
50 SW_DATA *(*init) (unsigned char *querySeq,
\r
52 signed char *matrix);
\r
53 void (*scan) (unsigned char *querySeq,
\r
57 SEARCH_OPTIONS *options,
\r
58 SCORE_LIST *scores);
\r
59 void (*done) (SW_DATA *pSwData);
\r
62 SW_FUNCT_DEFS swFuncs[] = {
\r
87 const char AMINO_ACIDS[ALPHA_SIZE] = {
\r
88 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
\r
89 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R',
\r
90 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'
\r
93 const int AMINO_ACID_VALUE[256] = {
\r
94 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
95 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
96 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
97 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
98 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1,
\r
99 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,
\r
100 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, -1,
\r
101 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, -1, -1, -1, -1, -1,
\r
102 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
103 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
104 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
105 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
106 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
108 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
\r
109 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
\r
112 SCORE_LIST *initList (int count);
\r
113 void freeList (SCORE_LIST *list);
\r
115 void printResults (SCORE_LIST *list);
\r
117 void printUsage (void)
\r
119 printf ("Usage: swsse2 [-h] [-(n|w|r|s)] [-i num] [-e num] [-t num] "
\r
120 "[-c num] matrix query db\n");
\r
121 printf (" -h : this help message\n");
\r
122 printf (" -n : run a non-vectorized Smith-Waterman search\n");
\r
123 printf (" -w : run a vectorized Wozniak search\n");
\r
125 printf (" -r : run a vectorized Rognes search\n");
\r
127 printf (" -r : run a vectorized Rognes search (NOT SUPPORTED)\n");
\r
129 printf (" -s : run a vectorized striped search (default)\n");
\r
130 printf (" -i num : gap init penalty (default -10)\n");
\r
131 printf (" -e num : gap extension penalty (default -2)\n");
\r
132 printf (" -t num : minimum score threshold (default 20)\n");
\r
133 printf (" -c num : number of scores to be displayed (default 250)\n");
\r
134 printf (" matrix : scoring matrix file\n");
\r
135 printf (" query : query sequence file (fasta format)\n");
\r
136 printf (" db : sequence database file (fasta format)\n");
\r
140 int main (int argc, char **argv)
\r
146 int rptCount = 250;
\r
148 SW_TYPES swType = STRIPED;
\r
150 char *dbFile = NULL;
\r
151 char *queryFile = NULL;
\r
152 char *matrixFile = NULL;
\r
154 signed char *matrix;
\r
156 unsigned char *querySeq;
\r
161 FASTA_LIB *queryLib;
\r
166 struct _timeb startTime;
\r
167 struct _timeb endTime;
\r
169 double startTimeSec;
\r
172 SEARCH_OPTIONS options;
\r
179 /* set the default options */
\r
180 options.gapInit = -10;
\r
181 options.gapExt = -2;
\r
182 options.threshold = 20;
\r
186 if (i + 3 == argc) {
\r
187 /* should be matrix file name */
\r
188 matrixFile = argv[i];
\r
190 } else if (i + 2 == argc) {
\r
191 /* should be query file name */
\r
192 queryFile = argv[i];
\r
194 } else if (i + 1 == argc) {
\r
195 /* should be matrix file name */
\r
199 /* process arguements */
\r
200 switch (argv[i][1]) {
\r
211 fprintf (stderr, "The ROGNES implementation is not supported\n");
\r
222 penalty = atoi (argv[++i]);
\r
223 if (penalty > 0 || penalty < -128) {
\r
224 fprintf (stderr, "Invalid gap init %d\n", penalty);
\r
225 fprintf (stderr, "Valid range is 0 - -128\n");
\r
228 options.gapInit = (unsigned short) penalty;
\r
231 penalty = atoi (argv[++i]);
\r
232 if (penalty > 0 || penalty < -128) {
\r
233 fprintf (stderr, "Invalid gap extension %d\n", penalty);
\r
234 fprintf (stderr, "Valid range is 0 - -128\n");
\r
237 options.gapExt = (unsigned short) penalty;
\r
240 options.threshold = atoi (argv[++i]);
\r
243 rptCount = atoi (argv[++i]);
\r
244 if (rptCount < 10) {
\r
249 fprintf (stderr, "Invalid option %s\n", argv[i]);
\r
257 list = initList (rptCount);
\r
259 matrix = readMatrix (matrixFile);
\r
260 if (matrix == NULL) {
\r
261 fprintf (stderr, "Error reading matrix\n");
\r
265 dbLib = openLib (dbFile, swType == WOZNIAK);
\r
266 queryLib = openLib (queryFile, 0);
\r
268 querySeq = nextSeq (queryLib, &queryLen);
\r
269 if (queryLen == 0) {
\r
270 fprintf (stderr, "Empty query sequence\n");
\r
274 printf ("%s vs %s\n", queryFile, dbFile);
\r
275 printf ("Matrix: %s, Init: %d, Ext: %d\n\n",
\r
276 matrixFile, options.gapInit, options.gapExt);
\r
278 _ftime(&startTime);
\r
280 swData = (swFuncs[swType].init) (querySeq, queryLen, matrix);
\r
282 (swFuncs[swType].scan) (querySeq, queryLen, dbLib, swData, &options, list);
\r
284 (swFuncs[swType].done) (swData);
\r
288 printResults (list);
\r
291 printf ("%d residues in query string\n", queryLen);
\r
292 printf ("%d residues in %d library sequences\n",
\r
293 dbLib->residues, dbLib->sequences);
\r
295 startTimeSec = startTime.time + startTime.millitm / 1000.0;
\r
296 endTimeSec = endTime.time + endTime.millitm / 1000.0;
\r
297 printf ("Scan time: %6.3f (%s implementation)\n",
\r
298 endTimeSec - startTimeSec,
\r
299 SW_IMPLEMENATION[swType]);
\r
301 closeLib (queryLib);
\r
311 initList (int count)
\r
319 hdr = (SCORE_LIST *) malloc (sizeof (SCORE_LIST));
\r
321 fprintf (stderr, "Cannot allocate storage for score header\n");
\r
325 list = (SCORE_NODE *) calloc (count, sizeof (SCORE_NODE));
\r
326 if (list == NULL) {
\r
327 fprintf (stderr, "Cannot allocate storage for scores\n");
\r
331 /* initialize the scores list */
\r
336 hdr->buffer = list;
\r
339 for (i = 0; i < count; ++i) {
\r
340 list[i].name[0] = '\0';
\r
344 list[i].prev = NULL;
\r
346 list[i].prev = &list[i-1];
\r
349 if (i == count - 1) {
\r
350 list[i].next = NULL;
\r
352 list[i].next = &list[i+1];
\r
359 void freeList (SCORE_LIST *list)
\r
361 free (list->buffer);
\r
365 int insertList (SCORE_LIST *list, int score, char *name)
\r
368 SCORE_NODE *ptr = list->first;
\r
370 if (list->free != NULL) {
\r
372 list->free = list->free->next;
\r
373 } else if (score > list->last->score) {
\r
375 list->last = node->prev;
\r
376 list->last->next = NULL;
\r
378 /* should never happen */
\r
379 return list->minScore + 1;
\r
382 strncpy (node->name, name, MAX_SCORE_NAME);
\r
383 node->name[MAX_SCORE_NAME - 1] = '\0';
\r
384 node->score = score;
\r
386 while (ptr && ptr->score >= score) {
\r
390 if (list->first == NULL) {
\r
391 list->first = node;
\r
395 } else if (ptr == NULL) {
\r
396 node->prev = list->last;
\r
398 node->prev->next = node;
\r
401 node->prev = ptr->prev;
\r
404 if (node->prev == NULL) {
\r
405 list->first = node;
\r
407 node->prev->next = node;
\r
412 if (list->free == NULL) {
\r
413 list->minScore = list->last->score + 1;
\r
416 return list->minScore;
\r
419 void printResults (SCORE_LIST *list)
\r
421 SCORE_NODE *ptr = list->first;
\r
423 printf ("Score Description\n");
\r
426 printf ("%5d %s\n", ptr->score, ptr->name);
\r