From f221655a2515b417a545caece40b6985e927fb83 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 29 Oct 2010 13:42:25 +0000 Subject: [PATCH] Optionally output "DP", the individual read depth --- bam2bcf.c | 15 +++++++++++++-- bam2bcf.h | 2 +- bam_plcmd.c | 7 +++++-- bcftools/bcf.c | 6 ++++-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/bam2bcf.c b/bam2bcf.c index e55212c..88222e7 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -154,8 +154,9 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, return 0; } -int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b) +int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr) { + extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); kstring_t s; int i; b->n_smpl = bc->n; @@ -178,9 +179,19 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b) } kputc('\0', &s); // FMT - kputs("PL", &s); kputc('\0', &s); + kputs("PL", &s); + if (bcr) { + kputs(":DP", &s); + } + kputc('\0', &s); b->m_str = s.m; b->str = s.s; b->l_str = s.l; bcf_sync(b); memcpy(b->gi[0].data, bc->PL, b->gi[0].len * bc->n); + if (bcr) { + uint16_t *dp = (uint16_t*)b->gi[1].data; + for (i = 0; i < bc->n; ++i) { + dp[i] = bcr[i].depth < 0xffff? bcr[i].depth : 0xffff; + } + } return 0; } diff --git a/bam2bcf.h b/bam2bcf.h index 0dddb92..f96a7e3 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -28,7 +28,7 @@ extern "C" { void bcf_call_destroy(bcf_callaux_t *bca); int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf_callaux_t *bca, bcf_callret1_t *r); int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call); - int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b); + int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr); #ifdef __cplusplus } diff --git a/bam_plcmd.c b/bam_plcmd.c index dca8518..75f5691 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -528,6 +528,7 @@ int bam_pileup(int argc, char *argv[]) #define MPLP_NO_COMP 0x20 #define MPLP_NO_ORPHAN 0x40 #define MPLP_REALN 0x80 +#define MPLP_SAMINFO 0x100 typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres; @@ -714,7 +715,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); bcf_call_combine(gplp.n, bcr, ref16, &bc); - bcf_call2bcf(tid, pos, &bc, b); + bcf_call2bcf(tid, pos, &bc, b, (conf->flag&MPLP_SAMINFO)? bcr : 0); bcf_write(bp, bh, b); bcf_destroy(b); } else { @@ -770,7 +771,7 @@ int bam_mpileup(int argc, char *argv[]) mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; - while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:B")) >= 0) { + while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BI")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); @@ -784,6 +785,7 @@ int bam_mpileup(int argc, char *argv[]) case 'B': mplp.flag &= ~MPLP_REALN & ~MPLP_NO_ORPHAN; break; case 'O': mplp.flag |= MPLP_NO_ORPHAN; break; case 'R': mplp.flag |= MPLP_REALN; break; + case 'I': mplp.flag |= MPLP_SAMINFO; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; @@ -802,6 +804,7 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); fprintf(stderr, " -B disable BAQ computation\n"); + fprintf(stderr, " -I additional individual information (DP and ST)\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; diff --git a/bcftools/bcf.c b/bcftools/bcf.c index 9044b93..035eddc 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -134,7 +134,9 @@ int bcf_sync(bcf1_t *b) b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2; } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2)) { b->gi[i].len = 2; - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2) + || b->gi[i].fmt == bcf_str2int("ST", 2)) + { b->gi[i].len = 1; } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2 * 4; @@ -236,7 +238,7 @@ void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s) } } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { kputw(((uint16_t*)b->gi[i].data)[j], s); - } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { + } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("ST", 2)) { kputw(((uint8_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("GT", 2)) { int y = ((uint8_t*)b->gi[i].data)[j]; -- 2.39.2