From 7943d9b7b1c159da840cbcfeb9cb277bb58474f2 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 20 Nov 2010 04:16:08 +0000 Subject: [PATCH] * samtools-0.1.10-9 (r844) * added the "folded" or reference-free mode for variant calling --- bamtk.c | 2 +- bcftools/call1.c | 8 ++++++-- bcftools/prob1.c | 21 +++++++++++++++++++-- bcftools/prob1.h | 1 + 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/bamtk.c b/bamtk.c index 6d4eb76..cb17906 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.10-8 (r841)" +#define PACKAGE_VERSION "0.1.10-9 (r844)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bcftools/call1.c b/bcftools/call1.c index b0b14fc..5e4fc9f 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -26,6 +26,7 @@ KSTREAM_INIT(gzFile, gzread, 16384) #define VC_CALL_GT 2048 #define VC_ADJLD 4096 #define VC_NO_INDEL 8192 +#define VC_FOLDED 16384 typedef struct { int flag, prior_type, n1; @@ -216,8 +217,9 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; - while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:I")) >= 0) { + while ((c = getopt(argc, argv, "fN1:l:cHAGvbSuP:t:p:QgLi:I")) >= 0) { switch (c) { + case 'f': vc.flag |= VC_FOLDED; break; case '1': vc.n1 = atoi(optarg); break; case 'l': vc.fn_list = strdup(optarg); break; case 'N': vc.flag |= VC_ACGT_ONLY; break; @@ -260,6 +262,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -Q output the QCALL likelihood format\n"); fprintf(stderr, " -L calculate LD for adjacent sites\n"); fprintf(stderr, " -I skip indels\n"); + fprintf(stderr, " -f reference-free variant calling\n"); fprintf(stderr, " -1 INT number of group-1 samples [0]\n"); fprintf(stderr, " -l FILE list of sites to output [all sites]\n"); fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4lg]\n", vc.theta); @@ -293,7 +296,8 @@ int bcfview(int argc, char *argv[]) bcf_p1_set_n1(p1, vc.n1); bcf_p1_init_subprior(p1, vc.prior_type, vc.theta); } - if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac); + if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac); // otherwise use the default indel_frac + if (vc.flag & VC_FOLDED) bcf_p1_set_folded(p1); } if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h); if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) { diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 176a0fc..8bf968f 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -32,7 +32,7 @@ unsigned char seq_nt4_table[256] = { }; struct __bcf_p1aux_t { - int n, M, n1, is_indel; + int n, M, n1, is_indel, is_folded; double *q2p, *pdg; // pdg -> P(D|g) double *phi, *phi_indel; double *z, *zswap; // aux for afs @@ -43,6 +43,13 @@ struct __bcf_p1aux_t { int PL_len; }; +static void fold_array(int M, double *x) +{ + int k; + for (k = 0; k < M/2; ++k) + x[k] = x[M-k] = (x[k] + x[M-k]) / 2.; +} + void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x) { int i; @@ -155,6 +162,15 @@ int bcf_p1_set_n1(bcf_p1aux_t *b, int n1) return 0; } +void bcf_p1_set_folded(bcf_p1aux_t *p1a) +{ + if (p1a->n1 < 0) { + p1a->is_folded = 1; + fold_array(p1a->M, p1a->phi); + fold_array(p1a->M, p1a->phi_indel); + } +} + void bcf_p1_destroy(bcf_p1aux_t *ma) { if (ma) { @@ -373,7 +389,7 @@ int bcf_p1_cal(bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst) // rst->rank0 = cal_pdg(b, ma); rst->f_exp = mc_cal_afs(ma); - rst->p_ref = ma->afs1[ma->M]; + rst->p_ref = ma->is_folded? ma->afs1[ma->M] + ma->afs1[0] : ma->afs1[ma->M]; // calculate f_flat and f_em for (k = 0, sum = 0.; k <= ma->M; ++k) sum += (long double)ma->z[k]; @@ -412,6 +428,7 @@ int bcf_p1_cal(bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst) void bcf_p1_dump_afs(bcf_p1aux_t *ma) { int k; + if (ma->is_folded) fold_array(ma->M, ma->afs); fprintf(stderr, "[afs]"); for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lf", k, ma->afs[ma->M - k]); diff --git a/bcftools/prob1.h b/bcftools/prob1.h index 6d9d28e..3827534 100644 --- a/bcftools/prob1.h +++ b/bcftools/prob1.h @@ -32,6 +32,7 @@ extern "C" { int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn); long double bcf_p1_cal_g3(bcf_p1aux_t *p1a, double g[3]); int bcf_p1_set_n1(bcf_p1aux_t *b, int n1); + void bcf_p1_set_folded(bcf_p1aux_t *p1a); // only effective when set_n1() is not called #ifdef __cplusplus } -- 2.39.2