From 896d270d34b71e0422ddca4a0d62136cb2e537be Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 29 Oct 2010 16:10:40 +0000 Subject: [PATCH] * samtools-0.1.9-1 (r786) * samtools: optionally perform exact test for each sample --- bam2bcf.c | 21 +++++++++++++++++++-- bam2bcf.h | 2 +- bam_plcmd.c | 13 ++++++++----- bamtk.c | 2 +- bcftools/bcf.c | 4 ++-- bcftools/vcf.c | 4 ++-- 6 files changed, 33 insertions(+), 13 deletions(-) diff --git a/bam2bcf.c b/bam2bcf.c index 88222e7..509761d 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -154,7 +154,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, return 0; } -int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr) +int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP) { extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); kstring_t s; @@ -182,6 +182,7 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bc kputs("PL", &s); if (bcr) { kputs(":DP", &s); + if (is_SP) kputs(":SP", &s); } kputc('\0', &s); b->m_str = s.m; b->str = s.s; b->l_str = s.l; @@ -189,8 +190,24 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bc memcpy(b->gi[0].data, bc->PL, b->gi[0].len * bc->n); if (bcr) { uint16_t *dp = (uint16_t*)b->gi[1].data; + uint8_t *sp = is_SP? b->gi[2].data : 0; for (i = 0; i < bc->n; ++i) { - dp[i] = bcr[i].depth < 0xffff? bcr[i].depth : 0xffff; + bcf_callret1_t *p = bcr + i; + dp[i] = p->depth < 0xffff? p->depth : 0xffff; + if (is_SP) { + if (p->anno[0] + p->anno[1] < 2 || p->anno[2] + p->anno[3] < 2 + || p->anno[0] + p->anno[2] < 2 || p->anno[1] + p->anno[3] < 2) + { + sp[i] = 0; + } else { + double left, right, two; + int x; + kt_fisher_exact(p->anno[0], p->anno[1], p->anno[2], p->anno[3], &left, &right, &two); + x = (int)(-4.343 * log(two) + .499); + if (x > 255) x = 255; + sp[i] = x; + } + } } } return 0; diff --git a/bam2bcf.h b/bam2bcf.h index f96a7e3..4de743a 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -28,7 +28,7 @@ extern "C" { void bcf_call_destroy(bcf_callaux_t *bca); int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf_callaux_t *bca, bcf_callret1_t *r); int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call); - int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr); + int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP); #ifdef __cplusplus } diff --git a/bam_plcmd.c b/bam_plcmd.c index 75f5691..c23726e 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -528,7 +528,8 @@ int bam_pileup(int argc, char *argv[]) #define MPLP_NO_COMP 0x20 #define MPLP_NO_ORPHAN 0x40 #define MPLP_REALN 0x80 -#define MPLP_SAMINFO 0x100 +#define MPLP_FMT_DP 0x100 +#define MPLP_FMT_SP 0x200 typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres; @@ -715,7 +716,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); bcf_call_combine(gplp.n, bcr, ref16, &bc); - bcf_call2bcf(tid, pos, &bc, b, (conf->flag&MPLP_SAMINFO)? bcr : 0); + bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, (conf->flag&MPLP_FMT_SP)); bcf_write(bp, bh, b); bcf_destroy(b); } else { @@ -771,7 +772,7 @@ int bam_mpileup(int argc, char *argv[]) mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; - while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BI")) >= 0) { + while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDS")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); @@ -785,7 +786,8 @@ int bam_mpileup(int argc, char *argv[]) case 'B': mplp.flag &= ~MPLP_REALN & ~MPLP_NO_ORPHAN; break; case 'O': mplp.flag |= MPLP_NO_ORPHAN; break; case 'R': mplp.flag |= MPLP_REALN; break; - case 'I': mplp.flag |= MPLP_SAMINFO; break; + case 'D': mplp.flag |= MPLP_FMT_DP; break; + case 'S': mplp.flag |= MPLP_FMT_SP; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; @@ -804,7 +806,8 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); fprintf(stderr, " -B disable BAQ computation\n"); - fprintf(stderr, " -I additional individual information (DP and ST)\n"); + fprintf(stderr, " -D output per-sample DP\n"); + fprintf(stderr, " -S output per-sample SP (strand bias P-value, slow)\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; diff --git a/bamtk.c b/bamtk.c index 082767c..741afee 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.9 (r783)" +#define PACKAGE_VERSION "0.1.9-1 (r786)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bcftools/bcf.c b/bcftools/bcf.c index 035eddc..494dccc 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -135,7 +135,7 @@ int bcf_sync(bcf1_t *b) } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2)) { b->gi[i].len = 2; } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2) - || b->gi[i].fmt == bcf_str2int("ST", 2)) + || b->gi[i].fmt == bcf_str2int("SP", 2)) { b->gi[i].len = 1; } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { @@ -238,7 +238,7 @@ void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s) } } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { kputw(((uint16_t*)b->gi[i].data)[j], s); - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("ST", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) { kputw(((uint8_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("GT", 2)) { int y = ((uint8_t*)b->gi[i].data)[j]; diff --git a/bcftools/vcf.c b/bcftools/vcf.c index ebca869..9b661ff 100644 --- a/bcftools/vcf.c +++ b/bcftools/vcf.c @@ -156,7 +156,7 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) for (i = 0; i < b->n_gi; ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 1<<7; - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = 0; } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { ((uint16_t*)b->gi[i].data)[k-9] = 0; @@ -173,7 +173,7 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b) for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) { if (b->gi[i].fmt == bcf_str2int("GT", 2)) { ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6; - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) { double _x = strtod(q, &q); int x = (int)(_x + .499); if (x > 255) x = 255; -- 2.39.2