From 67184476fcd4a7aba449556a2333586465fd6102 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 2 Aug 2010 20:49:35 +0000 Subject: [PATCH] * samtools-0.1.8-9 (r649) * lossless representation of PL in BCF output --- Makefile | 2 +- bam2bcf.c | 28 ++++++++++++++++++++-------- bam2bcf.h | 2 +- bamtk.c | 2 +- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 4f89dc1..7d98b24 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC= gcc -CFLAGS= -g -Wall #-O2 #-m64 #-arch ppc +CFLAGS= -g -Wall -O2 #-m64 #-arch ppc DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -D_CURSES_LIB=1 KNETFILE_O= knetfile.o LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ diff --git a/bam2bcf.c b/bam2bcf.c index a919cbf..a78e7d2 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -168,13 +168,25 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, for (i = 1; i < 4; ++i) // insertion sort for (j = i; j > 0 && sum[j] < sum[j-1]; --j) tmp = sum[j], sum[j] = sum[j-1], sum[j-1] = tmp; - call->a[0] = sum[3]&3; call->a[1] = sum[2]&3; call->a[2] = -1; // set the reference allele and alternative allele(s) - if (ref4 != call->a[0]) { // ref is not the best - if (ref4 < 4) { // not ambiguous - if (ref4 == call->a[1]) tmp = call->a[0], call->a[0] = call->a[1], call->a[1] = tmp; // switch 0 and 1 - else call->a[2] = call->a[1], call->a[1] = call->a[0], call->a[0] = ref4; // triallele + for (i = 0; i < 4; ++i) call->a[i] = -1; + call->unseen = -1; + if (ref4 < 4) { + call->a[0] = ref4; + for (i = 3, j = 1; i >= 0; --i) { + if ((sum[i]&3) != ref4) { + if (sum[i]>>2 != 0) call->a[j++] = sum[i]&3; + else break; + } } + if (j < 4 && i >= 0) call->unseen = j, call->a[j++] = sum[i]&3; + call->n_alleles = j; + } else { + for (i = 3, j = 0; i >= 0; --i) + if (sum[i]>>2 != 0) call->a[j++] = sum[i]&3; + else break; + if (j < 4 && i >= 0) call->unseen = j, call->a[j++] = sum[i]&3; + call->n_alleles = j; } // set the PL array if (call->n < n) { @@ -184,7 +196,6 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, { int x, g[6], z; double sum_min = 0.; - call->n_alleles = call->a[2] < 0? 2 : 3; x = call->n_alleles * (call->n_alleles + 1) / 2; // get the possible genotypes for (i = z = 0; i < call->n_alleles; ++i) @@ -226,11 +237,12 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b) for (i = beg; i < 4; ++i) { if (bc->a[i] < 0) break; if (i > beg) kputc(',', &s); - kputc("ACGT"[bc->a[i]], &s); +// kputc(bc->unseen == i && i != 3? 'X' : "ACGT"[bc->a[i]], &s); + kputc(bc->unseen == i? 'X' : "ACGT"[bc->a[i]], &s); } kputc('\0', &s); kputc('\0', &s); - kputs("MQ=", &s); kputw(bc->rmsQ, &s); kputc('\0', &s); + kputs("MQ=", &s); kputw(bc->rmsQ, &s); kputs(";DP=", &s); kputw(bc->depth, &s); kputc('\0', &s); kputs("PL", &s); kputc('\0', &s); b->m_str = s.m; b->str = s.s; b->l_str = s.l; bcf_sync(bc->n, b); diff --git a/bam2bcf.h b/bam2bcf.h index 3d9a804..b96488b 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -15,7 +15,7 @@ typedef struct { typedef struct { int a[4]; // alleles: ref, alt, alt2, alt3 - int n, n_alleles, shift, ori_ref; + int n, n_alleles, shift, ori_ref, unseen; int depth, rmsQ; uint8_t *PL; } bcf_call_t; diff --git a/bamtk.c b/bamtk.c index 7b64b8f..e3b71fe 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.8-8 (r644)" +#define PACKAGE_VERSION "0.1.8-9 (r649)" #endif int bam_taf2baf(int argc, char *argv[]); -- 2.39.2