#define VC_ACGT_ONLY 512
#define VC_QCALL 1024
#define VC_CALL_GT 2048
+#define VC_ADJLD 4096
+#define VC_NO_INDEL 8192
typedef struct {
int flag, prior_type, n1;
char *fn_list, *prior_file;
- double theta, pref;
+ double theta, pref, indel_frac;
} viewconf_t;
khash_t(set64) *bcf_load_pos(const char *fn, bcf_hdr_t *_h)
kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s);
kputs(b->info, &s);
if (b->info[0]) kputc(';', &s);
- ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp);
+// ksprintf(&s, "AF1=%.4lg;AFE=%.4lg;CI95=%.4lg,%.4lg", 1.-pr->f_em, 1.-pr->f_exp, pr->cil, pr->cih);
+ ksprintf(&s, "AF1=%.4lg;CI95=%.4lg,%.4lg", 1.-pr->f_em, pr->cil, pr->cih);
ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
if (a.is_tested) {
if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%lg,%lg,%lg,%lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
return is_var;
}
+double bcf_ld_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
+
int bcfview(int argc, char *argv[])
{
extern int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b);
+ extern void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x);
bcf_t *bp, *bout = 0;
- bcf1_t *b;
+ bcf1_t *b, *blast;
int c;
uint64_t n_processed = 0;
viewconf_t vc;
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
- vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5;
- while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:Qg")) >= 0) {
+ vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.;
+ while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:I")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.fn_list = strdup(optarg); break;
case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break;
case 'H': vc.flag |= VC_HWE; break;
case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break;
+ case 'I': vc.flag |= VC_NO_INDEL; break;
case 't': vc.theta = atof(optarg); break;
case 'p': vc.pref = atof(optarg); break;
+ case 'i': vc.indel_frac = atof(optarg); break;
case 'Q': vc.flag |= VC_QCALL; break;
+ case 'L': vc.flag |= VC_ADJLD; break;
case 'P':
if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL;
else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2;
fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n");
fprintf(stderr, " -N skip sites where REF is not A/C/G/T\n");
fprintf(stderr, " -Q output the QCALL likelihood format\n");
+ fprintf(stderr, " -L calculate LD for adjacent sites\n");
+ fprintf(stderr, " -I skip indels\n");
fprintf(stderr, " -1 INT number of group-1 samples [0]\n");
fprintf(stderr, " -l FILE list of sites to output [all sites]\n");
- fprintf(stderr, " -t FLOAT scaled mutation rate [%.4lg]\n", vc.theta);
+ fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4lg]\n", vc.theta);
+ fprintf(stderr, " -i FLOAT indel-to-substitution ratio [%.4lg]\n", vc.indel_frac);
fprintf(stderr, " -p FLOAT variant if P(ref|D)<FLOAT [%.3lg]\n", vc.pref);
fprintf(stderr, " -P STR type of prior: full, cond2, flat [full]\n");
fprintf(stderr, "\n");
}
b = calloc(1, sizeof(bcf1_t));
+ blast = calloc(1, sizeof(bcf1_t));
strcpy(moder, "r");
if (!(vc.flag & VC_VCFIN)) strcat(moder, "b");
strcpy(modew, "w");
bcf_p1_set_n1(p1, vc.n1);
bcf_p1_init_subprior(p1, vc.prior_type, vc.theta);
}
+ if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac);
}
if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h);
if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) {
}
}
while (vcf_read(bp, h, b) > 0) {
- if (vc.flag & VC_ACGT_ONLY) {
+ int is_indel = bcf_is_indel(b);
+ if ((vc.flag & VC_NO_INDEL) && is_indel) continue;
+ if ((vc.flag & VC_ACGT_ONLY) && !is_indel) {
int x;
if (b->ref[0] == 0 || b->ref[1] != 0) continue;
x = toupper(b->ref[0]);
bcf_2qcall(h, b);
continue;
}
+ if (vc.flag & (VC_CALL|VC_ADJLD)) bcf_gl2pl(b);
if (vc.flag & VC_CALL) { // call variants
bcf_p1rst_t pr;
- bcf_gl2pl(b);
bcf_p1_cal(b, p1, &pr); // pr.g[3] is not calculated here
if (vc.flag&VC_HWE) bcf_p1_cal_g3(p1, pr.g);
if (n_processed % 100000 == 0) {
if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue;
update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag);
}
+ if (vc.flag & VC_ADJLD) { // compute LD
+ double f[4], r2;
+ if ((r2 = bcf_ld_freq(blast, b, f)) >= 0) {
+ kstring_t s;
+ s.m = s.l = 0; s.s = 0;
+ if (*b->info) kputc(';', &s);
+ ksprintf(&s, "NEIR=%.3lf;NEIF=%.3lf,%.3lf", r2, f[0]+f[2], f[0]+f[1]);
+ bcf_append_info(b, s.s, s.l);
+ free(s.s);
+ }
+ bcf_cpy(blast, b);
+ }
if (vc.flag & VC_NO_GENO) { // do not output GENO fields
b->n_gi = 0;
b->fmt[0] = '\0';
if (vc.prior_file) free(vc.prior_file);
if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);
bcf_hdr_destroy(h);
- bcf_destroy(b);
+ bcf_destroy(b); bcf_destroy(blast);
vcf_close(bp); vcf_close(bout);
if (hash) kh_destroy(set64, hash);
if (vc.fn_list) free(vc.fn_list);
if (p1) bcf_p1_destroy(p1);
return 0;
}
-