From d4c6e3a92315fc9fb1e1ba3c85112c69ae8299e4 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 8 Sep 2010 16:53:55 +0000 Subject: [PATCH] * samtools-0.1.8-13 (r715) * fixed a bug in identifying SM across files * bcftools: estimate heterozygosity * bcftools: allow to skip sites without reference bases --- bamtk.c | 2 +- bcftools/call1.c | 11 ++++++++++- bcftools/prob1.c | 6 ++++-- sample.c | 6 +++--- sample.h | 2 +- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/bamtk.c b/bamtk.c index ce27209..ce4ebd5 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.8-13 (r698)" +#define PACKAGE_VERSION "0.1.8-13 (r715)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bcftools/call1.c b/bcftools/call1.c index 9589e89..c0c24a6 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -22,6 +22,7 @@ KSTREAM_INIT(gzFile, gzread, 16384) #define VC_UNCOMP 64 #define VC_HWE 128 #define VC_KEEPALT 256 +#define VC_ACGT_ONLY 512 typedef struct { int flag, prior_type, n1; @@ -195,10 +196,11 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; - while ((c = getopt(argc, argv, "1:l:cHAGvLbSuP:t:p:")) >= 0) { + while ((c = getopt(argc, argv, "N1:l:cHAGvLbSuP:t:p:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; case 'l': vc.fn_list = strdup(optarg); break; + case 'N': vc.flag |= VC_ACGT_ONLY; break; case 'G': vc.flag |= VC_NO_GENO; break; case 'L': vc.flag |= VC_NO_PL; break; case 'A': vc.flag |= VC_KEEPALT; break; @@ -230,6 +232,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -L discard the PL genotype field\n"); fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n"); fprintf(stderr, " -v output potential variant sites only\n"); + fprintf(stderr, " -N skip sites where REF is not A/C/G/T\n"); fprintf(stderr, " -1 INT number of group-1 samples [0]\n"); fprintf(stderr, " -l FILE list of sites to output [all sites]\n"); fprintf(stderr, " -t FLOAT scaled mutation rate [%.4lg]\n", vc.theta); @@ -277,6 +280,12 @@ int bcfview(int argc, char *argv[]) } } while (vcf_read(bp, h, b) > 0) { + if (vc.flag & VC_ACGT_ONLY) { + int x; + if (b->ref[0] == 0 || b->ref[1] != 0) continue; + x = toupper(b->ref[0]); + if (x != 'A' && x != 'C' && x != 'G' && x != 'T') continue; + } if (hash) { uint64_t x = (uint64_t)b->tid<<32 | b->pos; khint_t k = kh_get(set64, hash, x); diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 85c2945..b27bb8e 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -106,8 +106,10 @@ int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn) for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum; for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]); fputc('\n', stderr); - for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k]; - fprintf(stderr, "[heterozygosity] %lf\n", (double)sum / ma->M); + for (sum = 0., k = 1; k < ma->M; ++k) sum += ma->phi[ma->M - k] * (2.* k * (ma->M - k) / ma->M / (ma->M - 1)); + fprintf(stderr, "[%s] heterozygosity=%lf, ", __func__, (double)sum); + for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k] / ma->M; + fprintf(stderr, "theta=%lf\n", (double)sum); return 0; } diff --git a/sample.c b/sample.c index 95bec68..b3d2642 100644 --- a/sample.c +++ b/sample.c @@ -9,6 +9,7 @@ bam_sample_t *bam_smpl_init(void) bam_sample_t *s; s = calloc(1, sizeof(bam_sample_t)); s->rg2smid = kh_init(sm); + s->sm2id = kh_init(sm); return s; } @@ -23,6 +24,7 @@ void bam_smpl_destroy(bam_sample_t *sm) for (k = kh_begin(rg2smid); k != kh_end(rg2smid); ++k) if (kh_exist(rg2smid, k)) free((char*)kh_key(rg2smid, k)); kh_destroy(sm, sm->rg2smid); + kh_destroy(sm, sm->sm2id); free(sm); } @@ -52,8 +54,7 @@ int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) const char *p = txt, *q, *r; kstring_t buf; int n = 0; - khash_t(sm) *sm2id; - sm2id = kh_init(sm); + khash_t(sm) *sm2id = (khash_t(sm)*)sm->sm2id; memset(&buf, 0, sizeof(kstring_t)); while ((q = strstr(p, "@RG")) != 0) { p = q + 3; @@ -75,7 +76,6 @@ int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt) } if (n == 0) add_pair(sm, sm2id, fn, fn); free(buf.s); - kh_destroy(sm, sm2id); return 0; } diff --git a/sample.h b/sample.h index 149f380..85fe499 100644 --- a/sample.h +++ b/sample.h @@ -6,7 +6,7 @@ typedef struct { int n, m; char **smpl; - void *rg2smid; + void *rg2smid, *sm2id; } bam_sample_t; bam_sample_t *bam_smpl_init(void); -- 2.39.2