From b6f550edaa9384857879894bf01399fad76dac37 Mon Sep 17 00:00:00 2001
From: Heng Li <lh3@live.co.uk>
Date: Wed, 18 Aug 2010 13:55:20 +0000
Subject: [PATCH] allow to read the prior from the error output. EM iteration
 is working.

---
 bcftools/prob1.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 bcftools/prob1.h  |  1 +
 bcftools/vcfout.c | 17 +++++++++++------
 3 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/bcftools/prob1.c b/bcftools/prob1.c
index 4afda56..4611af1 100644
--- a/bcftools/prob1.c
+++ b/bcftools/prob1.c
@@ -2,8 +2,12 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <errno.h>
 #include "prob1.h"
 
+#include "kseq.h"
+KSTREAM_INIT(gzFile, gzread, 16384)
+
 #define MC_AVG_ERR 0.007
 #define MC_MAX_EM_ITER 16
 #define MC_EM_EPS 1e-4
@@ -54,6 +58,43 @@ void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta)
 	}
 }
 
+int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn)
+{
+	gzFile fp;
+	kstring_t s;
+	kstream_t *ks;
+	long double sum;
+	int dret, k;
+	memset(&s, 0, sizeof(kstring_t));
+	fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+	ks = ks_init(fp);
+	memset(ma->phi, 0, sizeof(double) * (ma->M + 1));
+	while (ks_getuntil(ks, '\n', &s, &dret) >= 0) {
+		if (strstr(s.s, "[afs] ") == s.s) {
+			char *p = s.s + 6;
+			for (k = 0; k <= ma->M; ++k) {
+				int x;
+				double y;
+				x = strtol(p, &p, 10);
+				if (x != k && (errno == EINVAL || errno == ERANGE)) return -1;
+				++p;
+				y = strtod(p, &p);
+				if (y == 0. && (errno == EINVAL || errno == ERANGE)) return -1;
+				ma->phi[ma->M - k] += y;
+			}
+		}
+	}
+	ks_destroy(ks);
+	gzclose(fp);
+	free(s.s);
+	for (sum = 0., k = 0; k <= ma->M; ++k) sum += ma->phi[k];
+	fprintf(stderr, "[prior]");
+	for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum;
+	for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]);
+	fputc('\n', stderr);
+	return 0;
+}
+
 bcf_p1aux_t *bcf_p1_init(int n) // FIXME: assuming diploid
 {
 	bcf_p1aux_t *ma;
diff --git a/bcftools/prob1.h b/bcftools/prob1.h
index 96beb04..358f551 100644
--- a/bcftools/prob1.h
+++ b/bcftools/prob1.h
@@ -25,6 +25,7 @@ extern "C" {
 	int bcf_p1_cal(bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst);
 	int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k);
 	void bcf_p1_dump_afs(bcf_p1aux_t *ma);
+	int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn);
 
 #ifdef __cplusplus
 }
diff --git a/bcftools/vcfout.c b/bcftools/vcfout.c
index fd817f9..f54fde5 100644
--- a/bcftools/vcfout.c
+++ b/bcftools/vcfout.c
@@ -23,7 +23,7 @@ KSTREAM_INIT(gzFile, gzread, 16384)
 
 typedef struct {
 	int flag, prior_type;
-	char *fn_list;
+	char *fn_list, *prior_file;
 	double theta, pref;
 } viewconf_t;
 
@@ -124,10 +124,8 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p
 	}
 	kputc('\0', &s); kputc('\0', &s);
 	kputs(b->info, &s);
-	if (p_dp >= 0. && d[2] + d[3] > 0) { // has at least one non-reference base
-		if (b->info[0]) kputc(';', &s);
-		ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp);
-	}
+	if (b->info[0]) kputc(';', &s);
+	ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp);
 	if (p_hwe <= .2) ksprintf(&s, ";GC=%.2lf,%.2lf,%.2lf;HWE=%.3lf", pr->g[2], pr->g[1], pr->g[0], p_hwe);
 	if (p_dp >= 0. && p_dp <= .2) ksprintf(&s, ";TDP=%.3lf", p_dp);
 	if (p_ed >= 0. && p_ed <= .2) ksprintf(&s, ";TED=%.3lf", p_ed);
@@ -173,6 +171,7 @@ int bcfview(int argc, char *argv[])
 			if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL;
 			else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2;
 			else if (strcmp(optarg, "flat") == 0) vc.prior_type = MC_PTYPE_FLAT;
+			else vc.prior_file = strdup(optarg);
 			break;
 		}
 	}
@@ -206,7 +205,12 @@ int bcfview(int argc, char *argv[])
 	vcf_hdr_write(bout, h);
 	if (vc.flag & VC_CALL) {
 		p1 = bcf_p1_init(h->n_smpl);
-		bcf_p1_init_prior(p1, vc.prior_type, vc.theta);
+		if (vc.prior_file) {
+			if (bcf_p1_read_prior(p1, vc.prior_file) < 0) {
+				fprintf(stderr, "[%s] fail to read the prior AFS.\n", __func__);
+				return 1;
+			}
+		} else bcf_p1_init_prior(p1, vc.prior_type, vc.theta);
 	}
 	if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h);
 	if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) {
@@ -248,6 +252,7 @@ int bcfview(int argc, char *argv[])
 		vcf_write(bout, h, b);
 		++n_processed;
 	}
+	if (vc.prior_file) free(vc.prior_file);
 	if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);
 	bcf_hdr_destroy(h);
 	bcf_destroy(b);
-- 
2.39.5