From: Heng Li Date: Fri, 25 Feb 2011 21:40:09 +0000 (+0000) Subject: * change the order of PL/GL according to the latest VCF spec X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=27eec65490fb3461f0af054868d9a0949e1843ed * change the order of PL/GL according to the latest VCF spec * change the type of SP to int32_t --- diff --git a/bam2bcf.c b/bam2bcf.c index 98acb5d..bd5503d 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -144,8 +144,8 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, x = call->n_alleles * (call->n_alleles + 1) / 2; // get the possible genotypes for (i = z = 0; i < call->n_alleles; ++i) - for (j = i; j < call->n_alleles; ++j) - g[z++] = call->a[i] * 5 + call->a[j]; + for (j = 0; j <= i; ++j) + g[z++] = call->a[j] * 5 + call->a[i]; for (i = 0; i < n; ++i) { uint8_t *PL = call->PL + x * i; const bcf_callret1_t *r = calls + i; diff --git a/bamtk.c b/bamtk.c index 096cd95..03e413d 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.12-11 (r896:110)" +#define PACKAGE_VERSION "0.1.12-11 (r921:118)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bcftools/bcf.c b/bcftools/bcf.c index 88ba1ba..080b6fe 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -136,10 +136,10 @@ int bcf_sync(bcf1_t *b) b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2; } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2)) { b->gi[i].len = 2; - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2) - || b->gi[i].fmt == bcf_str2int("SP", 2)) - { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) { b->gi[i].len = 1; + } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { + b->gi[i].len = 4; } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2 * 4; } @@ -240,8 +240,10 @@ void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s) } } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { kputw(((uint16_t*)b->gi[i].data)[j], s); - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { kputw(((uint8_t*)b->gi[i].data)[j], s); + } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { + kputw(((int32_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("GT", 2)) { int y = ((uint8_t*)b->gi[i].data)[j]; if (y>>7&1) { diff --git a/bcftools/bcf.tex b/bcftools/bcf.tex index 53bb8b0..8957364 100644 --- a/bcftools/bcf.tex +++ b/bcftools/bcf.tex @@ -48,8 +48,8 @@ {\tt PL} & {\tt uint8\_t[n*G]} & {Phred-scaled likelihood of data}\\ {\tt PS} & {\tt uint32\_t[n]} & {Phase set}\\ %{\tt SP} & {\tt uint8\_t[n]} & {Strand bias P-value (bcftools only)}\\ -\emph{Integer} & {\tt int32\_t[n*X]} & {Fix-sized custom Integer; $X$ can be $1$, $A$ or $G$.}\\ -\emph{Numeric} & {\tt double[n*X]} & {Fix-sized custom Numeric; $X$ can be $1$, $A$ or $G$.}\\ +\emph{Integer} & {\tt int32\_t[n*X]} & {Fix-sized custom Integer; $X$ defined in the header}\\ +\emph{Numeric} & {\tt double[n*X]} & {Fix-sized custom Numeric}\\ \emph{String} & {\tt uint32\_t+char*} & {\tt NULL} padded concat. strings (int equals to the length) \\ \hline \end{tabular} diff --git a/bcftools/bcf2qcall.c b/bcftools/bcf2qcall.c index 8634c9e..a86bac2 100644 --- a/bcftools/bcf2qcall.c +++ b/bcftools/bcf2qcall.c @@ -77,8 +77,8 @@ int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b) for (k = j = 0; k < 4; ++k) { for (l = k; l < 4; ++l) { int t, x = map[k], y = map[l]; - if (x > y) t = x, x = y, y = t; - g[j++] = p[x * b->n_alleles - x * (x-1) / 2 + (y - x)]; + if (x > y) t = x, x = y, y = t; // swap + g[j++] = p[y * (y+1) / 2 + x]; } } printf("%s\t%d\t%c", h->ns[b->tid], b->pos+1, *b->ref); diff --git a/bcftools/bcfutils.c b/bcftools/bcfutils.c index 8a8e0c9..ae7ec0f 100644 --- a/bcftools/bcfutils.c +++ b/bcftools/bcfutils.c @@ -63,8 +63,9 @@ int bcf_str2id_add(void *_hash, const char *str) int bcf_shrink_alt(bcf1_t *b, int n) { char *p; - int i, j, k, *z, n_smpl = b->n_smpl; + int i, j, k, n_smpl = b->n_smpl; if (b->n_alleles <= n) return -1; + // update ALT if (n > 1) { for (p = b->alt, k = 1; *p; ++p) if (*p == ',' && ++k == n) break; @@ -73,10 +74,7 @@ int bcf_shrink_alt(bcf1_t *b, int n) ++p; memmove(p, b->flt, b->str + b->l_str - b->flt); b->l_str -= b->flt - p; - z = alloca(sizeof(int) / 2 * n * (n+1)); - for (i = k = 0; i < n; ++i) - for (j = 0; j < n - i; ++j) - z[k++] = i * b->n_alleles + j; + // update PL for (i = 0; i < b->n_gi; ++i) { bcf_ginfo_t *g = b->gi + i; if (g->fmt == bcf_str2int("PL", 2)) { @@ -85,7 +83,7 @@ int bcf_shrink_alt(bcf1_t *b, int n) g->len = n * (n + 1) / 2; for (l = k = 0; l < n_smpl; ++l) { uint8_t *dl = d + l * x; - for (j = 0; j < g->len; ++j) d[k++] = dl[z[j]]; + for (j = 0; j < g->len; ++j) d[k++] = dl[j]; } } // FIXME: to add GL } @@ -157,7 +155,8 @@ int bcf_anno_max(bcf1_t *b) { int k, max_gq, max_sp, n_het; kstring_t str; - uint8_t *gt, *gq, *sp; + uint8_t *gt, *gq; + int32_t *sp; max_gq = max_sp = n_het = 0; gt = locate_field(b, "GT", 2); if (gt == 0) return -1; diff --git a/bcftools/ld.c b/bcftools/ld.c index dc84d4b..11474ed 100644 --- a/bcftools/ld.c +++ b/bcftools/ld.c @@ -70,7 +70,7 @@ double bcf_ld_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]) for (i = 0; i < n_smpl; ++i) { const uint8_t *pi = PL[j] + i * PL_len[j]; double *p = pdg[j] + i * 3; - p[0] = g_q2p[pi[b[j]->n_alleles]]; p[1] = g_q2p[pi[1]]; p[2] = g_q2p[pi[0]]; + p[0] = g_q2p[pi[2]]; p[1] = g_q2p[pi[1]]; p[2] = g_q2p[pi[0]]; } } // iteration diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 9d418c5..193c4a0 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -168,18 +168,16 @@ void bcf_p1_destroy(bcf_p1aux_t *ma) static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma) { - int i, j, k; + int i, j; long *p, tmp; p = alloca(b->n_alleles * sizeof(long)); memset(p, 0, sizeof(long) * b->n_alleles); for (j = 0; j < ma->n; ++j) { const uint8_t *pi = ma->PL + j * ma->PL_len; double *pdg = ma->pdg + j * 3; - pdg[0] = ma->q2p[pi[b->n_alleles]]; pdg[1] = ma->q2p[pi[1]]; pdg[2] = ma->q2p[pi[0]]; - for (i = k = 0; i < b->n_alleles; ++i) { - p[i] += (int)pi[k]; - k += b->n_alleles - i; - } + pdg[0] = ma->q2p[pi[2]]; pdg[1] = ma->q2p[pi[1]]; pdg[2] = ma->q2p[pi[0]]; + for (i = 0; i < b->n_alleles; ++i) + p[i] += (int)pi[(i+1)*(i+2)/2-1]; } for (i = 0; i < b->n_alleles; ++i) p[i] = p[i]<<4 | i; for (i = 1; i < b->n_alleles; ++i) // insertion sort diff --git a/bcftools/vcf.c b/bcftools/vcf.c index d8cb1e6..81a38ea 100644 --- a/bcftools/vcf.c +++ b/bcftools/vcf.c @@ -182,8 +182,10 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 1<<7; - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 0; + } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { + ((int32_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { ((uint16_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) { @@ -199,11 +201,15 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6; - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { double _x = strtod(q, &q); int x = (int)(_x + .499); if (x > 255) x = 255; ((uint8_t*)b->gi[i].data)[k-9] = x; + } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) { + int x = strtol(q, &q, 10); + if (x > 0xffff) x = 0xffff; + ((uint32_t*)b->gi[i].data)[k-9] = x; } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { int x = strtol(q, &q, 10); if (x > 0xffff) x = 0xffff; diff --git a/cut_target.c b/cut_target.c index ee687fb..26f434f 100644 --- a/cut_target.c +++ b/cut_target.c @@ -46,6 +46,8 @@ static uint16_t gencns(ct_t *g, int n, const bam_pileup1_t *plp) b = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos)]; if (b > 3) continue; q = baseQ < p->b->core.qual? baseQ : p->b->core.qual; + if (q < 4) q = 4; + if (q > 63) q = 63; g->bases[k++] = q<<5 | bam1_strand(p->b)<<4 | b; } if (k == 0) return 0;