]> git.donarmstrong.com Git - samtools.git/commitdiff
* samtools-0.1.8-9 (r649)
authorHeng Li <lh3@live.co.uk>
Mon, 2 Aug 2010 20:49:35 +0000 (20:49 +0000)
committerHeng Li <lh3@live.co.uk>
Mon, 2 Aug 2010 20:49:35 +0000 (20:49 +0000)
 * lossless representation of PL in BCF output

Makefile
bam2bcf.c
bam2bcf.h
bamtk.c

index 4f89dc1e47d80243d591ca67048d25ef6832e4f8..7d98b247a79341885d518bcad46d51b08e9cba57 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC=                    gcc
-CFLAGS=                -g -Wall #-O2 #-m64 #-arch ppc
+CFLAGS=                -g -Wall -O2 #-m64 #-arch ppc
 DFLAGS=                -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -D_CURSES_LIB=1
 KNETFILE_O=    knetfile.o
 LOBJS=         bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
index a919cbf774e7325360e75c8c96b85a36e83ec142..a78e7d282c8ff2280a819ca32570a107a076d32a 100644 (file)
--- a/bam2bcf.c
+++ b/bam2bcf.c
@@ -168,13 +168,25 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/,
        for (i = 1; i < 4; ++i) // insertion sort
                for (j = i; j > 0 && sum[j] < sum[j-1]; --j)
                        tmp = sum[j], sum[j] = sum[j-1], sum[j-1] = tmp;
-       call->a[0] = sum[3]&3; call->a[1] = sum[2]&3; call->a[2] = -1;
        // set the reference allele and alternative allele(s)
-       if (ref4 != call->a[0]) { // ref is not the best
-               if (ref4 < 4) { // not ambiguous
-                       if (ref4 == call->a[1]) tmp = call->a[0], call->a[0] = call->a[1], call->a[1] = tmp; // switch 0 and 1
-                       else call->a[2] = call->a[1], call->a[1] = call->a[0], call->a[0] = ref4; // triallele
+       for (i = 0; i < 4; ++i) call->a[i] = -1;
+       call->unseen = -1;
+       if (ref4 < 4) {
+               call->a[0] = ref4;
+               for (i = 3, j = 1; i >= 0; --i) {
+                       if ((sum[i]&3) != ref4) {
+                               if (sum[i]>>2 != 0) call->a[j++] = sum[i]&3;
+                               else break;
+                       }
                }
+               if (j < 4 && i >= 0) call->unseen = j, call->a[j++] = sum[i]&3;
+               call->n_alleles = j;
+       } else {
+               for (i = 3, j = 0; i >= 0; --i)
+                       if (sum[i]>>2 != 0) call->a[j++] = sum[i]&3;
+                       else break;
+               if (j < 4 && i >= 0) call->unseen = j, call->a[j++] = sum[i]&3;
+               call->n_alleles = j;
        }
        // set the PL array
        if (call->n < n) {
@@ -184,7 +196,6 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/,
        {
                int x, g[6], z;
                double sum_min = 0.;
-               call->n_alleles = call->a[2] < 0? 2 : 3;
                x = call->n_alleles * (call->n_alleles + 1) / 2;
                // get the possible genotypes
                for (i = z = 0; i < call->n_alleles; ++i)
@@ -226,11 +237,12 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b)
        for (i = beg; i < 4; ++i) {
                if (bc->a[i] < 0) break;
                if (i > beg) kputc(',', &s);
-               kputc("ACGT"[bc->a[i]], &s);
+//             kputc(bc->unseen == i && i != 3? 'X' : "ACGT"[bc->a[i]], &s);
+               kputc(bc->unseen == i? 'X' : "ACGT"[bc->a[i]], &s);
        }
        kputc('\0', &s);
        kputc('\0', &s);
-       kputs("MQ=", &s); kputw(bc->rmsQ, &s); kputc('\0', &s);
+       kputs("MQ=", &s); kputw(bc->rmsQ, &s); kputs(";DP=", &s); kputw(bc->depth, &s); kputc('\0', &s);
        kputs("PL", &s); kputc('\0', &s);
        b->m_str = s.m; b->str = s.s; b->l_str = s.l;
        bcf_sync(bc->n, b);
index 3d9a8043a0b65bea590643b3ac34d9c07b214b89..b96488b1ee29ed90b99bdad59f11e941a4c14cb7 100644 (file)
--- a/bam2bcf.h
+++ b/bam2bcf.h
@@ -15,7 +15,7 @@ typedef struct {
 
 typedef struct {
        int a[4]; // alleles: ref, alt, alt2, alt3
-       int n, n_alleles, shift, ori_ref;
+       int n, n_alleles, shift, ori_ref, unseen;
        int depth, rmsQ;
        uint8_t *PL;
 } bcf_call_t;
diff --git a/bamtk.c b/bamtk.c
index 7b64b8f3d0c79fb928facf6cc73cd69c94726de6..e3b71fea8c64e6675ed9797eaf9ba2375940bacb 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -9,7 +9,7 @@
 #endif
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.8-8 (r644)"
+#define PACKAGE_VERSION "0.1.8-9 (r649)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);