From fb9ec864a4db78d58c5aba13c6c24ce4b610ab57 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 25 Jul 2010 16:01:26 +0000 Subject: [PATCH] * samtools-0.1.8-6 (r638) * skip isnan/isinf in case of float underflow * added the flat prior * fixed an issue where there are no reads supporting the reference --- bam_mcns.c | 18 +++++++++++++----- bam_mcns.h | 3 ++- bam_plcmd.c | 18 +++++++++++++----- bamtk.c | 2 +- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/bam_mcns.c b/bam_mcns.c index d337e5a..7ef9f0f 100644 --- a/bam_mcns.c +++ b/bam_mcns.c @@ -12,7 +12,7 @@ struct __mc_aux_t { int n, M; - int ref, alt; + int ref, alt, alt2; double *q2p, *pdg; // pdg -> P(D|g) double *alpha, *beta; double *z, *zswap; // aux for afs @@ -26,6 +26,9 @@ void mc_init_prior(mc_aux_t *ma, int type, double theta) if (type == MC_PTYPE_COND2) { for (i = 0; i <= 2 * ma->n; ++i) ma->alpha[i] = 2. * (i + 1) / (2 * ma->n + 1) / (2 * ma->n + 2); + } else if (type == MC_PTYPE_FLAT) { + for (i = 0; i <= ma->M; ++i) + ma->alpha[i] = 1. / (ma->M + 1); } else { double sum; for (i = 0, sum = 0.; i < 2 * ma->n; ++i) @@ -111,9 +114,11 @@ static void set_allele(int ref, mc_aux_t *ma) for (i = 1; i < 4; ++i) // insertion sort for (j = i; j > 0 && sum[j] < sum[j-1]; --j) tmp = sum[j], sum[j] = sum[j-1], sum[j-1] = tmp; - ma->ref = sum[3]&3; ma->alt = sum[2]&3; - if (ref == ma->alt) tmp = ma->ref, ma->ref = ma->alt, ma->alt = tmp; - // note that ma->ref might not be ref in case of triallele + ma->ref = sum[3]&3; ma->alt = sum[2]&3; ma->alt2 = -1; + if (ma->ref != ref) { + if (ma->alt == ref) tmp = ma->ref, ma->ref = ma->alt, ma->alt = tmp; + else ma->alt2 = ma->alt, ma->alt = ma->ref, ma->ref = ref; + } } static void cal_pdg(mc_aux_t *ma) @@ -248,11 +253,14 @@ static void mc_add_afs(mc_aux_t *ma, double PD, double *f_map, double *p_map) int k, l; double sum = 0.; memset(ma->afs1, 0, sizeof(double) * (2 * ma->n + 1)); + *f_map = *p_map = -1.; for (k = 0; k <= 2 * ma->n; ++k) { mc_cal_z(ma, k); for (l = 0; l <= 2 * ma->n; ++l) ma->afs1[l] += ma->alpha[k] * ma->z[l] / PD; } + for (k = 0; k <= ma->M; ++k) + if (isnan(ma->afs1[k]) || isinf(ma->afs1[k])) return; for (k = 0; k <= 2 * ma->n; ++k) { ma->afs[k] += ma->afs1[k]; sum += ma->afs1[k]; @@ -280,7 +288,7 @@ int mc_cal(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, mc_rst_t *r set_allele(ref, ma); cal_pdg(ma); // set ref/major allele - rst->ref = ma->ref; rst->alt = ma->alt; + rst->ref = ma->ref; rst->alt = ma->alt; rst->alt2 = ma->alt2; // calculate naive and Nielsen's freq rst->f_naive = mc_freq0(ma, &rst->f_nielsen); { // calculate f_em diff --git a/bam_mcns.h b/bam_mcns.h index 5c7c420..0781975 100644 --- a/bam_mcns.h +++ b/bam_mcns.h @@ -8,7 +8,7 @@ typedef struct __mc_aux_t mc_aux_t; typedef struct { // O(n) - int ref, alt; + int ref, alt, alt2; double f_em, f_naive, f_nielsen; // O(n^2) double PD, p_ref, f_exp; @@ -18,6 +18,7 @@ typedef struct { #define MC_PTYPE_FULL 1 #define MC_PTYPE_COND2 2 +#define MC_PTYPE_FLAT 3 #ifdef __cplusplus extern "C" { diff --git a/bam_plcmd.c b/bam_plcmd.c index 195a596..a1c87ab 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -584,8 +584,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) ++N; // number of processed lines printf("%s\t%d\t.\t%c\t", h->target_name[tid], pos + 1, _ref0b); if (is_var) { - if (_ref0 == r.ref) putchar("ACGTN"[r.alt]); - else printf("%c,%c", "ACGTN"[r.ref], "ACGTN"[r.alt]); + putchar("ACGTN"[r.alt]); + if (r.alt2 >= 0 && r.alt2 < 4) printf(",%c", "ACGT"[r.alt2]); } else putchar('.'); printf("\t%d\t", qref); if (!tot) printf("Q13\t"); @@ -665,10 +665,18 @@ int bam_mpileup(int argc, char *argv[]) mplp.max_mq = 60; mplp.prior_type = MC_PTYPE_FULL; mplp.theta = 1e-3; - while ((c = getopt(argc, argv, "vVcF2Sf:r:l:VM:q:t:")) >= 0) { + while ((c = getopt(argc, argv, "vVcFSP:f:r:l:VM:q:t:")) >= 0) { switch (c) { case 't': mplp.theta = atof(optarg); break; - case '2': mplp.prior_type = MC_PTYPE_COND2; break; + case 'P': + if (strcmp(optarg, "full") == 0) mplp.prior_type = MC_PTYPE_FULL; + else if (strcmp(optarg, "cond2") == 0) mplp.prior_type = MC_PTYPE_COND2; + else if (strcmp(optarg, "flat") == 0) mplp.prior_type = MC_PTYPE_FLAT; + else { + fprintf(stderr, "[%s] unrecognized prior type.\n", __func__); + return 1; + } + break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; @@ -693,10 +701,10 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq); fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -t FLOAT scaled mutation rate [%lg]\n", mplp.theta); + fprintf(stderr, " -P STR prior: full, flat, cond2 [full]"); fprintf(stderr, " -c generate VCF output (consensus calling)\n"); fprintf(stderr, " -v show variant sites only\n"); fprintf(stderr, " -S calculate AFS (slow, to stderr)\n"); - fprintf(stderr, " -2 conditional prior\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming error independency and diploid individuals.\n\n"); return 1; diff --git a/bamtk.c b/bamtk.c index ae972d5..e80d042 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.8-5 (r636)" +#define PACKAGE_VERSION "0.1.8-6 (r638)" #endif int bam_taf2baf(int argc, char *argv[]); -- 2.39.2