]> git.donarmstrong.com Git - samtools.git/commitdiff
prepare for the indel caller. It is not ready yet.
authorHeng Li <lh3@live.co.uk>
Mon, 8 Nov 2010 03:23:16 +0000 (03:23 +0000)
committerHeng Li <lh3@live.co.uk>
Mon, 8 Nov 2010 03:23:16 +0000 (03:23 +0000)
Makefile
bam.h
bam2bcf.c
bam2bcf.h
bam2bcf_indel.c [new file with mode: 0644]
bam_plcmd.c
kaln.c
kaln.h

index 7ab972e21982a37b132edd1114805fc2ecd898f3..eeae213875b6b084592d7f0858452915f2a53695 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ LOBJS=          bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
                        $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o
 AOBJS=         bam_tview.o bam_maqcns.o bam_plcmd.o sam_view.o \
                        bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o     \
-                       bamtk.o kaln.o kprobaln.o bam2bcf.o errmod.o sample.o
+                       bamtk.o kaln.o kprobaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o
 PROG=          samtools
 INCLUDES=      -I.
 SUBDIRS=       . bcftools misc
diff --git a/bam.h b/bam.h
index 4594ac477e33be12fc023ac0e18c2cfe3dc7962e..eef2ea9853f25966c5a40c45a86272b78a507fb2 100644 (file)
--- a/bam.h
+++ b/bam.h
@@ -495,7 +495,7 @@ extern "C" {
                bam1_t *b;
                int32_t qpos;
                int indel, level;
-               uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1;
+               uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
        } bam_pileup1_t;
 
        typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
index 509761d3395df7d01e56f7a368f2f2daa29be4b3..fa9d7a47b762235ff6ea0a212c01b0c406bee301 100644 (file)
--- a/bam2bcf.c
+++ b/bam2bcf.c
@@ -14,18 +14,13 @@ extern      void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
 #define CAP_DIST 25
 
-struct __bcf_callaux_t {
-       int max_bases, capQ, min_baseQ;
-       uint16_t *bases;
-       errmod_t *e;
-};
-
 bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
 {
        bcf_callaux_t *bca;
        if (theta <= 0.) theta = CALL_DEFTHETA;
        bca = calloc(1, sizeof(bcf_callaux_t));
        bca->capQ = 60;
+       bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 70;
        bca->min_baseQ = min_baseQ;
        bca->e = errmod_init(1. - theta);
        return bca;
index 4de743a9287f7421e6f612a2426897d3dbe69212..94207d092e151f48b64d2380e1e71c1493f2e155 100644 (file)
--- a/bam2bcf.h
+++ b/bam2bcf.h
@@ -2,10 +2,20 @@
 #define BAM2BCF_H
 
 #include <stdint.h>
+#include "errmod.h"
 #include "bcftools/bcf.h"
 
-struct __bcf_callaux_t;
-typedef struct __bcf_callaux_t bcf_callaux_t;
+#define B2B_INDEL_NULL 10000
+
+typedef struct __bcf_callaux_t {
+       int capQ, min_baseQ;
+       int openQ, extQ, tandemQ;
+       // for internal uses
+       int max_bases;
+       int indel_types[4];
+       uint16_t *bases;
+       errmod_t *e;
+} bcf_callaux_t;
 
 typedef struct {
        int depth, qsum[4];
@@ -29,6 +39,7 @@ extern "C" {
        int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf_callaux_t *bca, bcf_callret1_t *r);
        int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call);
        int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP);
+       int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref);
 
 #ifdef __cplusplus
 }
diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c
new file mode 100644 (file)
index 0000000..7359cc8
--- /dev/null
@@ -0,0 +1,262 @@
+#include <assert.h>
+#include "bam.h"
+#include "bam2bcf.h"
+#include "ksort.h"
+#include "kaln.h"
+
+#define INDEL_DEBUG
+
+#define MINUS_CONST 0x10000000
+#define INDEL_WINDOW_SIZE 50
+#define INDEL_BAD_SCORE 10000
+
+static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
+{
+       int k, x = c->pos, y = 0, last_y = 0;
+       *_tpos = c->pos;
+       for (k = 0; k < c->n_cigar; ++k) {
+               int op = cigar[k] & BAM_CIGAR_MASK;
+               int l = cigar[k] >> BAM_CIGAR_SHIFT;
+               if (op == BAM_CMATCH) {
+                       if (c->pos > tpos) return y;
+                       if (x + l > tpos) {
+                               *_tpos = tpos;
+                               return y + (tpos - x);
+                       }
+                       x += l; y += l;
+                       last_y = y;
+               } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
+               else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
+                       if (x + l > tpos) {
+                               *_tpos = is_left? x : x + l;
+                               return y;
+                       }
+                       x += l;
+               }
+       }
+       *_tpos = x;
+       return last_y;
+}
+// l is the relative gap length and l_run is the length of the homopolymer on the reference
+static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run)
+{
+       int q, qh;
+       q = bca->openQ + bca->extQ * (abs(l) - 1);
+       qh = l_run >= 3? (int)(bca->tandemQ * (double)l / l_run + .499) : 1000;
+       return q < qh? q : qh;
+}
+
+int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
+{
+       extern void ks_introsort_uint32_t(int, uint32_t*);
+       int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score, N, K, l_run, ref_type;
+       char *inscns = 0, *ref2, *query;
+       if (ref == 0 || bca == 0) return -1;
+       // determine if there is a gap
+       for (s = 0; s < n; ++s) {
+               for (i = 0; i < n_plp[s]; ++i)
+                       if (plp[s][i].indel != 0) break;
+               if (i < n_plp[s]) break;
+       }
+       if (s == n) return -1; // there is no indel at this position.
+       { // find out how many types of indels are present
+               int m;
+               uint32_t *aux;
+               aux = calloc(n + 1, 4);
+               m = max_rd_len = 0;
+               aux[m++] = MINUS_CONST; // zero indel is always a type
+               for (s = N = 0; s < n; ++s) {
+                       N += n_plp[s]; // N is the total number of reads
+                       for (i = 0; i < n_plp[s]; ++i) {
+                               const bam_pileup1_t *p = plp[s] + i;
+                               if (p->indel != 0)
+                                       aux[m++] = MINUS_CONST + p->indel;
+                               j = bam_cigar2qlen(&p->b->core, bam1_cigar(p->b));
+                               if (j > max_rd_len) max_rd_len = j;
+                       }
+               }
+               ks_introsort(uint32_t, m, aux);
+               // squeeze out identical types
+               for (i = 1, n_types = 1; i < m; ++i)
+                       if (aux[i] != aux[i-1]) ++n_types;
+               assert(n_types > 1); // there must at least one type of non-reference indel
+               types = (int*)calloc(n_types, sizeof(int));
+               t = 0;
+               types[t++] = aux[0] - MINUS_CONST; 
+               for (i = 1; i < m; ++i)
+                       if (aux[i] != aux[i-1])
+                               types[t++] = aux[i] - MINUS_CONST;
+               free(aux);
+               for (t = 0; t < n_types; ++t)
+                       if (types[t] == 0) break;
+               ref_type = t; // the index of the reference type (0)
+               assert(n_types < 64);
+       }
+       { // calculate left and right boundary
+               left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0;
+               right = pos + INDEL_WINDOW_SIZE;
+               if (types[0] < 0) right -= types[0];
+               // in case the alignments stand out the reference
+               for (i = pos; i < right; ++i)
+                       if (ref[i] == 0) break;
+               right = i;
+       }
+       { // the length of the homopolymer run around the current position
+               int c = bam_nt16_table[(int)ref[pos + 1]];
+               if (c == 15) l_run = 1;
+               else {
+                       for (i = pos + 2; ref[i]; ++i)
+                               if (bam_nt16_table[(int)ref[i]] != c) break;
+                       l_run = i;
+                       for (i = pos; i >= 0; --i)
+                               if (bam_nt16_table[(int)ref[i]] != c) break;
+                       l_run -= i + 1;
+               }
+       }
+       // construct the consensus sequence
+       max_ins = types[n_types - 1]; // max_ins is at least 0
+       if (max_ins > 0) {
+               int *inscns_aux = calloc(4 * n_types * max_ins, sizeof(int));
+               // count the number of occurrences of each base at each position for each type of insertion
+               for (t = 0; t < n_types; ++t) {
+                       if (types[t] > 0) {
+                               for (s = 0; s < n; ++s) {
+                                       for (i = 0; i < n_plp[s]; ++i) {
+                                               bam_pileup1_t *p = plp[s] + i;
+                                               if (p->indel == types[t]) {
+                                                       uint8_t *seq = bam1_seq(p->b);
+                                                       for (k = 1; k <= p->indel; ++k) {
+                                                               int c = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos + k)];
+                                                               if (c < 4) ++inscns_aux[(t*max_ins+(k-1))*4 + c];
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+               // use the majority rule to construct the consensus
+               inscns = calloc(n_types * max_ins, 1);
+               for (t = 0; t < n_types; ++t) {
+                       for (j = 0; j < types[t]; ++j) {
+                               int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*4];
+                               for (k = 0; k < 4; ++k)
+                                       if (ia[k] > max)
+                                               max = ia[k], max_k = k;
+                               inscns[t*max_ins + j] = max? max_k : 4;
+                       }
+               }
+               free(inscns_aux);
+       }
+       // compute the likelihood given each type of indel for each read
+       ref2  = calloc(right - left + max_ins + 2, 1);
+       query = calloc(right - left + max_rd_len + max_ins + 2, 1);
+       score = calloc(N * n_types, sizeof(int));
+       for (t = 0; t < n_types; ++t) {
+               int l;
+               ka_param2_t ap = ka_param2_qual;
+               ap.band_width = abs(types[t]) + 3;
+               // write ref2
+               for (k = 0, j = left; j <= pos; ++j)
+                       ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
+               if (types[t] <= 0) j += -types[t];
+               else for (l = 0; l < types[t]; ++l)
+                                ref2[k++] = inscns[t*max_ins + l];
+               if (types[0] < 0) { // mask deleted sequences to avoid a particular error in the model.
+                       int jj, tmp = types[t] >= 0? -types[0] : -types[0] + types[t];
+                       for (jj = 0; jj < tmp && j < right && ref[j]; ++jj, ++j)
+                               ref2[k++] = 4;
+               }
+               for (; j < right && ref[j]; ++j)
+                       ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
+               if (j < right) right = j;
+               // align each read to ref2
+               for (s = K = 0; s < n; ++s) {
+                       for (i = 0; i < n_plp[s]; ++i, ++K) {
+                               bam_pileup1_t *p = plp[s] + i;
+                               int qbeg, qend, tbeg, tend, sc;
+                               uint8_t *seq = bam1_seq(p->b);
+                               // determine the start and end of sequences for alignment
+                               qbeg = tpos2qpos(&p->b->core, bam1_cigar(p->b), left,  0, &tbeg);
+                               qend = tpos2qpos(&p->b->core, bam1_cigar(p->b), right, 1, &tend);
+                               assert(tbeg >= left);
+                               // write the query sequence
+                               for (l = qbeg; l < qend; ++l)
+                                       query[l - qbeg] = bam_nt16_nt4_table[bam1_seqi(seq, l)];
+                               // do alignment; this takes most of computing time for indel calling
+#ifdef INDEL_DEBUG
+                               for (sc = 0; sc < tend - tbeg + types[t]; ++sc)
+                                       fputc("ACGTN"[(int)ref2[tbeg-left+sc]], stderr);
+                               fputc('\n', stderr);
+                               for (sc = 0; sc < qend - qbeg; ++sc) fputc("ACGTN"[(int)query[qbeg + sc]], stderr);
+                               fputc('\n', stderr);                            
+#endif
+                               sc = ka_global_score((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
+                                                                        (uint8_t*)query + qbeg, qend - qbeg, &ap);
+#ifdef INDEL_DEBUG
+                               fprintf(stderr, "pos=%d type=%d read=%d:%d name=%s score=%d\n", pos, types[t], s, i, bam1_qname(p->b), sc);
+#endif
+                               score[K*n_types + t] = -sc;
+                       }
+               }
+       }
+       free(ref2); free(query);
+       { // choose the top 4 indel types; reference must be included
+       }
+       { // compute indelQ
+               int *sc, tmp, *sumq;
+               sc   = alloca(n_types * sizeof(int));
+               sumq = alloca(n_types * sizeof(int));
+               memset(sumq, 0, sizeof(int) * n_types);
+               for (s = K = 0; s < n; ++s) {
+                       for (i = 0; i < n_plp[s]; ++i, ++K) {
+                               bam_pileup1_t *p = plp[s] + i;
+                               int *sct = &score[K*n_types], indelQ;
+                               for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
+                               for (t = 1; t < n_types; ++t) // insertion sort
+                                       for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
+                                               tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
+                               /* errmod_cal() assumes that if the call is wrong, the
+                                * likelihoods of other events are equal. This is about
+                                * right for substitutions, but is not desired for
+                                * indels. To reuse errmod_cal(), I have to make
+                                * compromise for multi-allelic indels.
+                                */
+                               if ((sc[0]&0x3f) == ref_type) {
+                                       indelQ = (sc[1]>>6) - (sc[0]>>6);
+                                       tmp = est_seqQ(bca, types[sc[1]&0x3f], l_run);
+                               } else {
+                                       for (t = 0; t < n_types; ++t) // look for the reference type
+                                               if ((sc[t]&0x3f) == ref_type) break;
+                                       indelQ = (sc[t]>>6) - (sc[0]>>6);
+                                       tmp = est_seqQ(bca, types[sc[0]&0x3f], l_run);
+                               }
+                               if (indelQ > tmp) indelQ = tmp;
+                               if (indelQ > p->b->core.qual) indelQ = p->b->core.qual;
+                               if (indelQ > bca->capQ) indelQ = bca->capQ;
+                               p->aux = (sc[0]&0x3f)<<8 | indelQ;
+                               sumq[sc[0]&0x3f] += indelQ;
+                       }
+               }
+               // determine bca->indel_types[]
+               for (t = 0; t < n_types; ++t)
+                       sumq[t] = sumq[t]<<6 | t;
+               for (t = 1; t < n_types; ++t) // insertion sort
+                       for (j = t; j > 0 && sumq[j] < sumq[j-1]; --j)
+                               tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp;
+               for (t = 0; t < n_types; ++t) // look for the reference type
+                       if ((sumq[t]&0x3f) == ref_type) break;
+               if (t) { // then move the reference type to the first
+                       tmp = sumq[t];
+                       for (; t > 0; --t) sumq[t] = sumq[t-1];
+                       sumq[0] = tmp;
+               }
+               for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
+               for (t = 0; t < 4 && t < n_types; ++t)
+                       bca->indel_types[t] = types[sumq[t]&0x3f];
+       }
+       // FIXME: to set the inserted sequence
+       free(score);
+       // free
+       free(types); free(inscns);
+       return 0;
+}
index a3e6aeb614927733676b2b25e4592786cb14ef2e..1b15aaec8b274e5ba92af6470c71a2f913f2a480 100644 (file)
@@ -729,6 +729,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                        bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, (conf->flag&MPLP_FMT_SP));
                        bcf_write(bp, bh, b);
                        bcf_destroy(b);
+//                     bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref);
                } else {
                        printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
                        for (i = 0; i < n; ++i) {
diff --git a/kaln.c b/kaln.c
index 4cba98d3ae4bcc9d9e3ab9a30d537412dbd0f2ed..01efe2e69300ea520a0a4de3565fa7241528f581 100644 (file)
--- a/kaln.c
+++ b/kaln.c
@@ -73,9 +73,19 @@ int aln_sm_blast[] = {
        -2, -2, -2, -2, -2
 };
 
+int aln_sm_qual[] = {
+         0, -23, -23, -23, 0,
+       -23,   0, -23, -23, 0,
+       -23, -23,   0, -23, 0,
+       -23, -23, -23,   0, 0,
+         0,   0,   0,   0, 0
+};
+
 ka_param_t ka_param_blast = {  5,  2,   5, 2, aln_sm_blast, 5, 50 };
 ka_param_t ka_param_aa2aa = { 10,  2,  10, 2, aln_sm_blosum62, 22, 50 };
 
+ka_param2_t ka_param2_qual  = { 35, 5, 35, 5, 35, 5, 0, 0, aln_sm_qual, 5, 50 };
+
 static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)
 {
        int i, n;
@@ -176,7 +186,7 @@ static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)
 }
 
 typedef struct {
-       uint8_t Mt:3, It:2, Dt:2;
+       uint8_t Mt:3, It:2, Dt:3;
 } dpcell_t;
 
 typedef struct {
@@ -208,7 +218,7 @@ uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const
        score_matrix = ap->matrix;
        N_MATRIX_ROW = ap->row;
 
-       *n_cigar = 0;
+       if (n_cigar) *n_cigar = 0;
        if (len1 == 0 || len2 == 0) return 0;
 
        /* calculate b1 and b2 */
@@ -370,3 +380,107 @@ uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const
 
        return cigar;
 }
+
+typedef struct {
+       int M, I, D;
+} score_aux_t;
+
+#define MINUS_INF -0x40000000
+
+// matrix: len2 rows and len1 columns
+int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap)
+{
+       
+#define __score_aux(_p, _q0, _sc, _io, _ie, _do, _de) {                                        \
+               int t1, t2;                                                                                                             \
+               score_aux_t *_q;                                                                                                \
+               _q = _q0;                                                                                                               \
+               _p->M = _q->M >= _q->I? _q->M : _q->I;                                                  \
+               _p->M = _p->M >= _q->D? _p->M : _q->D;                                                  \
+               _p->M += (_sc);                                                                                                 \
+               ++_q;      t1 = _q->M - _io - _ie; t2 = _q->I - _ie; _p->I = t1 >= t2? t1 : t2; \
+               _q = _p-1; t1 = _q->M - _do - _de; t2 = _q->D - _de; _p->D = t1 >= t2? t1 : t2; \
+       }
+
+       int i, j, bw, scmat_size = ap->row, *scmat = ap->matrix, ret;
+       const uint8_t *seq1, *seq2;
+       score_aux_t *curr, *last, *swap;
+       bw = abs(len1 - len2) + ap->band_width;
+       i = len1 > len2? len1 : len2;
+       if (bw > i + 1) bw = i + 1;
+       seq1 = _seq1 - 1; seq2 = _seq2 - 1;
+       curr = calloc(len1 + 2, sizeof(score_aux_t));
+       last = calloc(len1 + 2, sizeof(score_aux_t));
+       { // the zero-th row
+               int x, end = len1;
+               score_aux_t *p;
+               j = 0;
+               x = j + bw; end = len1 < x? len1 : x; // band end
+               p = curr;
+               p->M = 0; p->I = p->D = MINUS_INF;
+               for (i = 1, p = &curr[1]; i <= end; ++i, ++p)
+                       p->M = p->I = MINUS_INF, p->D = -(ap->edo + ap->ede * i);
+               p->M = p->I = p->D = MINUS_INF;
+               swap = curr; curr = last; last = swap;
+       }
+       for (j = 1; j < len2; ++j) {
+               int x, beg = 0, end = len1, *scrow, col_end;
+               score_aux_t *p;
+               x = j - bw; beg =    0 > x?    0 : x; // band start
+               x = j + bw; end = len1 < x? len1 : x; // band end
+               if (beg == 0) { // from zero-th column
+                       p = curr;
+                       p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j);
+                       ++beg; // then beg = 1
+               }
+               scrow = scmat + seq2[j] * scmat_size;
+               if (end == len1) col_end = 1, --end;
+               else col_end = 0;
+               for (i = beg, p = &curr[beg]; i <= end; ++i, ++p)
+                       __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->ido, ap->ide);
+               if (col_end) {
+                       __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->ido, ap->ide);
+                       ++p;
+               }
+               p->M = p->I = p->D = MINUS_INF;
+//             for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n');
+               swap = curr; curr = last; last = swap;
+       }
+       { // the last row
+               int x, beg = 0, *scrow;
+               score_aux_t *p;
+               j = len2;
+               x = j - bw; beg = 0 > x?    0 : x; // band start
+               if (beg == 0) { // from zero-th column
+                       p = curr;
+                       p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j);
+                       ++beg; // then beg = 1
+               }
+               scrow = scmat + seq2[j] * scmat_size;
+               for (i = beg, p = &curr[beg]; i < len1; ++i, ++p)
+                       __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->edo, ap->ede);
+               __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->edo, ap->ede);
+//             for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n');
+       }
+       ret = curr[len1].M >= curr[len1].I? curr[len1].M : curr[len1].I;
+       ret = ret >= curr[len1].D? ret : curr[len1].D;
+       free(curr); free(last);
+       return ret;
+}
+
+#ifdef _MAIN
+int main(int argc, char *argv[])
+{
+//     int len1 = 35, len2 = 35;
+//     uint8_t *seq1 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\0\1";
+//     uint8_t *seq2 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\1\0";
+       int len1 = 4, len2 = 4;
+       uint8_t *seq1 = (uint8_t*)"\1\0\0\1";
+       uint8_t *seq2 = (uint8_t*)"\1\0\1\0";
+       int sc;
+//     ka_global_core(seq1, 2, seq2, 1, &ka_param_qual, &sc, 0);
+       sc = ka_global_score(seq1, len1, seq2, len2, &ka_param2_qual);
+       printf("%d\n", sc);
+       return 0;
+}
+#endif
diff --git a/kaln.h b/kaln.h
index b25dccb32a978ff50659685e84b62f1ed442dffb..1ece132755e51212ab1aee8e56b7b316484e6d37 100644 (file)
--- a/kaln.h
+++ b/kaln.h
@@ -41,16 +41,27 @@ typedef struct {
        int band_width;
 } ka_param_t;
 
+typedef struct {
+       int iio, iie, ido, ide;
+       int eio, eie, edo, ede;
+       int *matrix;
+       int row;
+       int band_width;
+} ka_param2_t;
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
        uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap,
                                                         int *_score, int *n_cigar);
+       int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap);
 #ifdef __cplusplus
 }
 #endif
 
 extern ka_param_t ka_param_blast; /* = { 5, 2, 5, 2, aln_sm_blast, 5, 50 }; */
+extern ka_param_t ka_param_qual; // only use this for global alignment!!!
+extern ka_param2_t ka_param2_qual; // only use this for global alignment!!!
 
 #endif