From: Heng Li Date: Fri, 23 Jul 2010 01:39:13 +0000 (+0000) Subject: * samtools-0.1.8-3 (r627) X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=d763698cc12d7daa7361d610db3c9c9e83e7655b * samtools-0.1.8-3 (r627) * multi-sample snp calling appears to work. More tests needed. --- diff --git a/bam_mcns.c b/bam_mcns.c index cd64b0a..dee2016 100644 --- a/bam_mcns.c +++ b/bam_mcns.c @@ -12,7 +12,7 @@ struct __mc_aux_t { double theta; double *q2p, *pdg; // pdg -> P(D|g) double *alpha, *beta; - int *qsum, *bcnt, rcnt[4]; + int *qsum, *bcnt; }; void mc_init_prior(mc_aux_t *ma, double theta) @@ -65,11 +65,9 @@ static void sum_err(int *n, const bam_pileup1_t **plp, mc_aux_t *ma) int i, j; memset(ma->qsum, 0, sizeof(int) * 4 * ma->n); memset(ma->bcnt, 0, sizeof(int) * 4 * ma->n); - memset(ma->rcnt, 0, sizeof(int) * 4); for (j = 0; j < ma->n; ++j) { int tmp, *qsum = ma->qsum + j * 4; int *bcnt = ma->bcnt + j * 4; - double r = drand48(), rr; for (i = tmp = 0; i < n[j]; ++i) { const bam_pileup1_t *p = plp[j] + i; int q, b; @@ -83,13 +81,6 @@ static void sum_err(int *n, const bam_pileup1_t **plp, mc_aux_t *ma) ++bcnt[b]; ++tmp; } - if (tmp) { - for (i = 0, rr = 0.; i < 4; ++i) { - rr += (double)bcnt[i] / tmp; - if (rr >= r) break; - } - if (i < 4) ++ma->rcnt[i]; - } } } @@ -127,21 +118,21 @@ static void cal_pdg(mc_aux_t *ma) // return the reference allele frequency double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ref, int *alt) { - int i, acnt[4], j; - double fr = -1., f0 = -1.; + int i, cnt; + double f; sum_err(n, plp, ma); set_allele(ref, ma); cal_pdg(ma); - acnt[0] = acnt[1] = acnt[2] = acnt[3] = 0; - for (i = 0; i < ma->n; ++i) - for (j = 0; j < 4; ++j) - acnt[j] += ma->bcnt[i * 4 + j]; - if (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]) // never used... - fr = (double)ma->rcnt[ref] / (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]); - if (acnt[ma->ref] + acnt[ma->alt]) - f0 = (double)acnt[ref] / (acnt[ma->ref] + acnt[ma->alt]); *_ref = ma->ref; *alt = ma->alt; - return ma->n == 1 || fr < 0.? f0 : fr; + for (i = cnt = 0, f = 0.; i < ma->n; ++i) { + int *bcnt = ma->bcnt + i * 4; + int x = bcnt[ma->ref] + bcnt[ma->alt]; + if (x) { + ++cnt; + f += (double)bcnt[ma->ref] / x; + } + } + return f / cnt; } // f0 is the reference allele frequency double mc_freq_iter(double f0, mc_aux_t *ma) diff --git a/bam_plcmd.c b/bam_plcmd.c index 2854770..2afebea 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -557,11 +557,13 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) f = f0 = mc_freq0(_ref0, n_plp, plp, ma, &_ref, &_alt); if (f >= 0.0) { double q, flast = f; - for (j = 0; j < 10; ++j){ + for (j = 0; j < 10; ++j) { f = mc_freq_iter(flast, ma); +// printf("%lg->%lg\n", flast, f); if (fabs(f - flast) < 1e-3) break; flast = f; } + if (1. - f < 1e-4) f = 1.; pref = mc_ref_prob(ma); is_var = (pref < .5); q = is_var? pref : 1. - pref; @@ -655,7 +657,17 @@ int bam_mpileup(int argc, char *argv[]) } } if (argc == 1) { - fprintf(stderr, "Usage: samtools mpileup [-r reg] [-f in.fa] [-l pos] in1.bam [in2.bam [...]]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n"); + fprintf(stderr, "Options: -f FILE reference sequence file [null]\n"); + fprintf(stderr, " -r STR region in which pileup is generated [null]\n"); + fprintf(stderr, " -l FILE list of positions (format: chr pos) [null]\n"); + fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq); + fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq); + fprintf(stderr, " -V generate VCF output (SNP calling)\n"); + fprintf(stderr, " -v show variant sites only\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Notes: Assuming error independency and diploid individuals.\n\n"); return 1; } mpileup(&mplp, argc - optind, argv + optind); diff --git a/bamtk.c b/bamtk.c index b5f6b67..b51df96 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.8-2 (r617)" +#define PACKAGE_VERSION "0.1.8-3 (r627)" #endif int bam_taf2baf(int argc, char *argv[]);