From: Heng Li Date: Tue, 31 Aug 2010 14:20:57 +0000 (+0000) Subject: * rename vcfout.c as call1.c X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=082c52f3cb5517db50987bf1dc43aef845c45fd8 * rename vcfout.c as call1.c * prepare to add two-sample comparison --- diff --git a/bcftools/Makefile b/bcftools/Makefile index 6a6bdb8..5a3dc76 100644 --- a/bcftools/Makefile +++ b/bcftools/Makefile @@ -3,7 +3,7 @@ CFLAGS= -g -Wall -O2 #-m64 #-arch ppc DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE LOBJS= bcf.o vcf.o bcfutils.o prob1.o kfunc.o index.o fet.o OMISC= .. -AOBJS= vcfout.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o +AOBJS= call1.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o PROG= bcftools INCLUDES= SUBDIRS= . diff --git a/bcftools/call1.c b/bcftools/call1.c new file mode 100644 index 0000000..75483e9 --- /dev/null +++ b/bcftools/call1.c @@ -0,0 +1,299 @@ +#include +#include +#include +#include +#include +#include "bcf.h" +#include "prob1.h" +#include "kstring.h" + +#include "khash.h" +KHASH_SET_INIT_INT64(set64) + +#include "kseq.h" +KSTREAM_INIT(gzFile, gzread, 16384) + +#define VC_NO_PL 1 +#define VC_NO_GENO 2 +#define VC_BCFOUT 4 +#define VC_CALL 8 +#define VC_VARONLY 16 +#define VC_VCFIN 32 +#define VC_UNCOMP 64 + +typedef struct { + int flag, prior_type; + char *fn_list, *prior_file; + double theta, pref; +} viewconf_t; + +khash_t(set64) *bcf_load_pos(const char *fn, bcf_hdr_t *_h) +{ + void *str2id; + gzFile fp; + kstream_t *ks; + int ret, dret, lineno = 1; + kstring_t *str; + khash_t(set64) *hash = 0; + + hash = kh_init(set64); + str2id = bcf_build_refhash(_h); + str = calloc(1, sizeof(kstring_t)); + fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); + ks = ks_init(fp); + while (ks_getuntil(ks, 0, str, &dret) >= 0) { + int tid = bcf_str2id(str2id, str->s); + if (tid >= 0 && dret != '\n') { + if (ks_getuntil(ks, 0, str, &dret) >= 0) { + uint64_t x = (uint64_t)tid<<32 | (atoi(str->s) - 1); + kh_put(set64, hash, x, &ret); + } else break; + } else fprintf(stderr, "[%s] %s is not a reference name (line %d).\n", __func__, str->s, lineno); + if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); + if (dret < 0) break; + ++lineno; + } + bcf_str2id_destroy(str2id); + ks_destroy(ks); + gzclose(fp); + free(str->s); free(str); + return hash; +} + +static double test_hwe(const double g[3]) +{ + extern double kf_gammaq(double p, double x); + double fexp, chi2, f[3], n; + int i; + n = g[0] + g[1] + g[2]; + fexp = (2. * g[2] + g[1]) / (2. * n); + if (fexp > 1. - 1e-10) fexp = 1. - 1e-10; + if (fexp < 1e-10) fexp = 1e-10; + f[0] = n * (1. - fexp) * (1. - fexp); + f[1] = n * 2. * fexp * (1. - fexp); + f[2] = n * fexp * fexp; + for (i = 0, chi2 = 0.; i < 3; ++i) + chi2 += (g[i] - f[i]) * (g[i] - f[i]) / f[i]; + return kf_gammaq(.5, chi2 / 2.); +} + +typedef struct { + double p[4]; + int mq, depth, is_tested, d[4]; +} anno16_t; + +static double ttest(int n1, int n2, int a[4]) +{ + extern double kf_betai(double a, double b, double x); + double t, v, u1, u2; + if (n1 == 0 || n2 == 0 || n1 + n2 < 3) return 1.0; + u1 = (double)a[0] / n1; u2 = (double)a[2] / n2; + if (u1 <= u2) return 1.; + t = (u1 - u2) / sqrt(((a[1] - n1 * u1 * u1) + (a[3] - n2 * u2 * u2)) / (n1 + n2 - 2) * (1./n1 + 1./n2)); + v = n1 + n2 - 2; +// printf("%d,%d,%d,%d,%lf,%lf,%lf\n", a[0], a[1], a[2], a[3], t, u1, u2); + return t < 0.? 1. : .5 * kf_betai(.5*v, .5, v/(v+t*t)); +} + +static int test16_core(int anno[16], anno16_t *a) +{ + extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); + double left, right; + int i; + a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.; + memcpy(a->d, anno, 4 * sizeof(int)); + a->depth = anno[0] + anno[1] + anno[2] + anno[3]; + a->is_tested = (anno[0] + anno[1] > 0 && anno[2] + anno[3] > 0); + if (a->depth == 0) return -1; + a->mq = (int)(sqrt((anno[9] + anno[11]) / a->depth) + .499); + kt_fisher_exact(anno[0], anno[1], anno[2], anno[3], &left, &right, &a->p[0]); + for (i = 1; i < 4; ++i) + a->p[i] = ttest(anno[0] + anno[1], anno[2] + anno[3], anno+4*i); + return 0; +} + +static int test16(bcf1_t *b, anno16_t *a) +{ + char *p; + int i, anno[16]; + a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.; + a->d[0] = a->d[1] = a->d[2] = a->d[3] = 0.; + a->mq = a->depth = a->is_tested = 0; + if ((p = strstr(b->info, "I16=")) == 0) return -1; + p += 4; + for (i = 0; i < 16; ++i) { + anno[i] = strtol(p, &p, 10); + if (anno[i] == 0 && (errno == EINVAL || errno == ERANGE)) return -2; + ++p; + } + return test16_core(anno, a); +} + +static void rm_info(int n_smpl, bcf1_t *b, const char *key) +{ + char *p, *q; + if ((p = strstr(b->info, key)) == 0) return; + for (q = p; *q && *q != ';'; ++q); + if (p > b->info && *(p-1) == ';') --p; + memmove(p, q, b->l_str - (q - b->str)); + b->l_str -= q - p; + bcf_sync(n_smpl, b); +} + +static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag) +{ + kstring_t s; + int is_var = (pr->p_ref < pref); + double p_hwe, r = is_var? pr->p_ref : 1. - pr->p_ref; + anno16_t a; + + p_hwe = test_hwe(pr->g); + test16(b, &a); + rm_info(n_smpl, b, "I16="); + + memset(&s, 0, sizeof(kstring_t)); + kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); + if (is_var) { + kputs(b->alt, &s); + } + kputc('\0', &s); kputc('\0', &s); + kputs(b->info, &s); + if (b->info[0]) kputc(';', &s); + ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp); + ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); + if (a.is_tested) ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]); + if (p_hwe <= .2) ksprintf(&s, ";GC=%.2lf,%.2lf,%.2lf;HWE=%.3lf", pr->g[2], pr->g[1], pr->g[0], p_hwe); + kputc('\0', &s); + kputs(b->fmt, &s); kputc('\0', &s); + free(b->str); + b->m_str = s.m; b->l_str = s.l; b->str = s.s; + b->qual = r < 1e-100? 99 : -3.434 * log(r); + if (b->qual > 99) b->qual = 99; + bcf_sync(n_smpl, b); + return is_var; +} + +int bcfview(int argc, char *argv[]) +{ + bcf_t *bp, *bout = 0; + bcf1_t *b; + int c; + uint64_t n_processed = 0; + viewconf_t vc; + bcf_p1aux_t *p1 = 0; + bcf_hdr_t *h; + int tid, begin, end; + char moder[4], modew[4]; + khash_t(set64) *hash = 0; + + tid = begin = end = -1; + memset(&vc, 0, sizeof(viewconf_t)); + vc.prior_type = -1; vc.theta = 1e-3; vc.pref = 0.9; + while ((c = getopt(argc, argv, "l:cGvLbSuP:t:p:")) >= 0) { + switch (c) { + case 'l': vc.fn_list = strdup(optarg); break; + case 'G': vc.flag |= VC_NO_GENO; break; + case 'L': vc.flag |= VC_NO_PL; break; + case 'b': vc.flag |= VC_BCFOUT; break; + case 'S': vc.flag |= VC_VCFIN; break; + case 'c': vc.flag |= VC_CALL; break; + case 'v': vc.flag |= VC_VARONLY; break; + case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break; + case 't': vc.theta = atof(optarg); break; + case 'p': vc.pref = atof(optarg); break; + case 'P': + if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL; + else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2; + else if (strcmp(optarg, "flat") == 0) vc.prior_type = MC_PTYPE_FLAT; + else vc.prior_file = strdup(optarg); + break; + } + } + if (argc == optind) { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: bcftools view [options] [reg]\n\n"); + fprintf(stderr, "Options: -c SNP calling\n"); + fprintf(stderr, " -b output BCF instead of VCF\n"); + fprintf(stderr, " -u uncompressed BCF output\n"); + fprintf(stderr, " -S input is VCF\n"); + fprintf(stderr, " -G suppress all individual genotype information\n"); + fprintf(stderr, " -L discard the PL genotype field\n"); + fprintf(stderr, " -v output potential variant sites only\n"); + fprintf(stderr, " -l FILE list of sites to output [all sites]\n"); + fprintf(stderr, " -t FLOAT scaled mutation rate [%.4lg]\n", vc.theta); + fprintf(stderr, " -p FLOAT variant if P(ref|D)n_smpl); + if (vc.prior_file) { + if (bcf_p1_read_prior(p1, vc.prior_file) < 0) { + fprintf(stderr, "[%s] fail to read the prior AFS.\n", __func__); + return 1; + } + } else bcf_p1_init_prior(p1, vc.prior_type, vc.theta); + } + if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h); + if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) { + void *str2id = bcf_build_refhash(h); + if (bcf_parse_region(str2id, argv[optind+1], &tid, &begin, &end) >= 0) { + bcf_idx_t *idx; + idx = bcf_idx_load(argv[optind]); + if (idx) { + uint64_t off; + off = bcf_idx_query(idx, tid, begin, end); + bgzf_seek(bp->fp, off, SEEK_SET); + bcf_idx_destroy(idx); + } + } + } + while (vcf_read(bp, h, b) > 0) { + if (hash) { + uint64_t x = (uint64_t)b->tid<<32 | b->pos; + khint_t k = kh_get(set64, hash, x); + if (k == kh_end(hash)) continue; + } + if (tid >= 0) { + int l = strlen(b->ref); + l = b->pos + (l > 0? l : 1); + if (b->tid != tid || b->pos >= end) break; + if (!(l > begin && end > b->pos)) continue; + } + if (vc.flag & VC_CALL) { + bcf_p1rst_t pr; + bcf_p1_cal(b, p1, &pr); + if ((n_processed + 1) % 50000 == 0) bcf_p1_dump_afs(p1); + if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue; + update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag); + } + if (vc.flag & VC_NO_GENO) { + b->n_gi = 0; + b->fmt[0] = '\0'; + } + vcf_write(bout, h, b); + ++n_processed; + } + if (vc.prior_file) free(vc.prior_file); + if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); + bcf_hdr_destroy(h); + bcf_destroy(b); + vcf_close(bp); vcf_close(bout); + if (hash) kh_destroy(set64, hash); + if (vc.fn_list) free(vc.fn_list); + if (p1) bcf_p1_destroy(p1); + return 0; +} + diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 4611af1..e124b0f 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -32,7 +32,7 @@ unsigned char seq_nt4_table[256] = { }; struct __bcf_p1aux_t { - int n, M; + int n, M, n1; double *q2p, *pdg; // pdg -> P(D|g) double *phi; double *z, *zswap; // aux for afs @@ -100,6 +100,7 @@ bcf_p1aux_t *bcf_p1_init(int n) // FIXME: assuming diploid bcf_p1aux_t *ma; int i; ma = calloc(1, sizeof(bcf_p1aux_t)); + ma->n1 = -1; ma->n = n; ma->M = 2 * n; ma->q2p = calloc(256, sizeof(double)); ma->pdg = calloc(3 * ma->n, sizeof(double)); @@ -114,6 +115,13 @@ bcf_p1aux_t *bcf_p1_init(int n) // FIXME: assuming diploid return ma; } +int bcf_p1_set_n1(bcf_p1aux_t *b, int n1) +{ + if (n1 == 0 || n1 >= b->n) return -1; + b->n1 = n1; + return 0; +} + void bcf_p1_destroy(bcf_p1aux_t *ma) { if (ma) { diff --git a/bcftools/vcfout.c b/bcftools/vcfout.c deleted file mode 100644 index 75483e9..0000000 --- a/bcftools/vcfout.c +++ /dev/null @@ -1,299 +0,0 @@ -#include -#include -#include -#include -#include -#include "bcf.h" -#include "prob1.h" -#include "kstring.h" - -#include "khash.h" -KHASH_SET_INIT_INT64(set64) - -#include "kseq.h" -KSTREAM_INIT(gzFile, gzread, 16384) - -#define VC_NO_PL 1 -#define VC_NO_GENO 2 -#define VC_BCFOUT 4 -#define VC_CALL 8 -#define VC_VARONLY 16 -#define VC_VCFIN 32 -#define VC_UNCOMP 64 - -typedef struct { - int flag, prior_type; - char *fn_list, *prior_file; - double theta, pref; -} viewconf_t; - -khash_t(set64) *bcf_load_pos(const char *fn, bcf_hdr_t *_h) -{ - void *str2id; - gzFile fp; - kstream_t *ks; - int ret, dret, lineno = 1; - kstring_t *str; - khash_t(set64) *hash = 0; - - hash = kh_init(set64); - str2id = bcf_build_refhash(_h); - str = calloc(1, sizeof(kstring_t)); - fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); - ks = ks_init(fp); - while (ks_getuntil(ks, 0, str, &dret) >= 0) { - int tid = bcf_str2id(str2id, str->s); - if (tid >= 0 && dret != '\n') { - if (ks_getuntil(ks, 0, str, &dret) >= 0) { - uint64_t x = (uint64_t)tid<<32 | (atoi(str->s) - 1); - kh_put(set64, hash, x, &ret); - } else break; - } else fprintf(stderr, "[%s] %s is not a reference name (line %d).\n", __func__, str->s, lineno); - if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); - if (dret < 0) break; - ++lineno; - } - bcf_str2id_destroy(str2id); - ks_destroy(ks); - gzclose(fp); - free(str->s); free(str); - return hash; -} - -static double test_hwe(const double g[3]) -{ - extern double kf_gammaq(double p, double x); - double fexp, chi2, f[3], n; - int i; - n = g[0] + g[1] + g[2]; - fexp = (2. * g[2] + g[1]) / (2. * n); - if (fexp > 1. - 1e-10) fexp = 1. - 1e-10; - if (fexp < 1e-10) fexp = 1e-10; - f[0] = n * (1. - fexp) * (1. - fexp); - f[1] = n * 2. * fexp * (1. - fexp); - f[2] = n * fexp * fexp; - for (i = 0, chi2 = 0.; i < 3; ++i) - chi2 += (g[i] - f[i]) * (g[i] - f[i]) / f[i]; - return kf_gammaq(.5, chi2 / 2.); -} - -typedef struct { - double p[4]; - int mq, depth, is_tested, d[4]; -} anno16_t; - -static double ttest(int n1, int n2, int a[4]) -{ - extern double kf_betai(double a, double b, double x); - double t, v, u1, u2; - if (n1 == 0 || n2 == 0 || n1 + n2 < 3) return 1.0; - u1 = (double)a[0] / n1; u2 = (double)a[2] / n2; - if (u1 <= u2) return 1.; - t = (u1 - u2) / sqrt(((a[1] - n1 * u1 * u1) + (a[3] - n2 * u2 * u2)) / (n1 + n2 - 2) * (1./n1 + 1./n2)); - v = n1 + n2 - 2; -// printf("%d,%d,%d,%d,%lf,%lf,%lf\n", a[0], a[1], a[2], a[3], t, u1, u2); - return t < 0.? 1. : .5 * kf_betai(.5*v, .5, v/(v+t*t)); -} - -static int test16_core(int anno[16], anno16_t *a) -{ - extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); - double left, right; - int i; - a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.; - memcpy(a->d, anno, 4 * sizeof(int)); - a->depth = anno[0] + anno[1] + anno[2] + anno[3]; - a->is_tested = (anno[0] + anno[1] > 0 && anno[2] + anno[3] > 0); - if (a->depth == 0) return -1; - a->mq = (int)(sqrt((anno[9] + anno[11]) / a->depth) + .499); - kt_fisher_exact(anno[0], anno[1], anno[2], anno[3], &left, &right, &a->p[0]); - for (i = 1; i < 4; ++i) - a->p[i] = ttest(anno[0] + anno[1], anno[2] + anno[3], anno+4*i); - return 0; -} - -static int test16(bcf1_t *b, anno16_t *a) -{ - char *p; - int i, anno[16]; - a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.; - a->d[0] = a->d[1] = a->d[2] = a->d[3] = 0.; - a->mq = a->depth = a->is_tested = 0; - if ((p = strstr(b->info, "I16=")) == 0) return -1; - p += 4; - for (i = 0; i < 16; ++i) { - anno[i] = strtol(p, &p, 10); - if (anno[i] == 0 && (errno == EINVAL || errno == ERANGE)) return -2; - ++p; - } - return test16_core(anno, a); -} - -static void rm_info(int n_smpl, bcf1_t *b, const char *key) -{ - char *p, *q; - if ((p = strstr(b->info, key)) == 0) return; - for (q = p; *q && *q != ';'; ++q); - if (p > b->info && *(p-1) == ';') --p; - memmove(p, q, b->l_str - (q - b->str)); - b->l_str -= q - p; - bcf_sync(n_smpl, b); -} - -static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag) -{ - kstring_t s; - int is_var = (pr->p_ref < pref); - double p_hwe, r = is_var? pr->p_ref : 1. - pr->p_ref; - anno16_t a; - - p_hwe = test_hwe(pr->g); - test16(b, &a); - rm_info(n_smpl, b, "I16="); - - memset(&s, 0, sizeof(kstring_t)); - kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); - if (is_var) { - kputs(b->alt, &s); - } - kputc('\0', &s); kputc('\0', &s); - kputs(b->info, &s); - if (b->info[0]) kputc(';', &s); - ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp); - ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); - if (a.is_tested) ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]); - if (p_hwe <= .2) ksprintf(&s, ";GC=%.2lf,%.2lf,%.2lf;HWE=%.3lf", pr->g[2], pr->g[1], pr->g[0], p_hwe); - kputc('\0', &s); - kputs(b->fmt, &s); kputc('\0', &s); - free(b->str); - b->m_str = s.m; b->l_str = s.l; b->str = s.s; - b->qual = r < 1e-100? 99 : -3.434 * log(r); - if (b->qual > 99) b->qual = 99; - bcf_sync(n_smpl, b); - return is_var; -} - -int bcfview(int argc, char *argv[]) -{ - bcf_t *bp, *bout = 0; - bcf1_t *b; - int c; - uint64_t n_processed = 0; - viewconf_t vc; - bcf_p1aux_t *p1 = 0; - bcf_hdr_t *h; - int tid, begin, end; - char moder[4], modew[4]; - khash_t(set64) *hash = 0; - - tid = begin = end = -1; - memset(&vc, 0, sizeof(viewconf_t)); - vc.prior_type = -1; vc.theta = 1e-3; vc.pref = 0.9; - while ((c = getopt(argc, argv, "l:cGvLbSuP:t:p:")) >= 0) { - switch (c) { - case 'l': vc.fn_list = strdup(optarg); break; - case 'G': vc.flag |= VC_NO_GENO; break; - case 'L': vc.flag |= VC_NO_PL; break; - case 'b': vc.flag |= VC_BCFOUT; break; - case 'S': vc.flag |= VC_VCFIN; break; - case 'c': vc.flag |= VC_CALL; break; - case 'v': vc.flag |= VC_VARONLY; break; - case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break; - case 't': vc.theta = atof(optarg); break; - case 'p': vc.pref = atof(optarg); break; - case 'P': - if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL; - else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2; - else if (strcmp(optarg, "flat") == 0) vc.prior_type = MC_PTYPE_FLAT; - else vc.prior_file = strdup(optarg); - break; - } - } - if (argc == optind) { - fprintf(stderr, "\n"); - fprintf(stderr, "Usage: bcftools view [options] [reg]\n\n"); - fprintf(stderr, "Options: -c SNP calling\n"); - fprintf(stderr, " -b output BCF instead of VCF\n"); - fprintf(stderr, " -u uncompressed BCF output\n"); - fprintf(stderr, " -S input is VCF\n"); - fprintf(stderr, " -G suppress all individual genotype information\n"); - fprintf(stderr, " -L discard the PL genotype field\n"); - fprintf(stderr, " -v output potential variant sites only\n"); - fprintf(stderr, " -l FILE list of sites to output [all sites]\n"); - fprintf(stderr, " -t FLOAT scaled mutation rate [%.4lg]\n", vc.theta); - fprintf(stderr, " -p FLOAT variant if P(ref|D)n_smpl); - if (vc.prior_file) { - if (bcf_p1_read_prior(p1, vc.prior_file) < 0) { - fprintf(stderr, "[%s] fail to read the prior AFS.\n", __func__); - return 1; - } - } else bcf_p1_init_prior(p1, vc.prior_type, vc.theta); - } - if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h); - if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) { - void *str2id = bcf_build_refhash(h); - if (bcf_parse_region(str2id, argv[optind+1], &tid, &begin, &end) >= 0) { - bcf_idx_t *idx; - idx = bcf_idx_load(argv[optind]); - if (idx) { - uint64_t off; - off = bcf_idx_query(idx, tid, begin, end); - bgzf_seek(bp->fp, off, SEEK_SET); - bcf_idx_destroy(idx); - } - } - } - while (vcf_read(bp, h, b) > 0) { - if (hash) { - uint64_t x = (uint64_t)b->tid<<32 | b->pos; - khint_t k = kh_get(set64, hash, x); - if (k == kh_end(hash)) continue; - } - if (tid >= 0) { - int l = strlen(b->ref); - l = b->pos + (l > 0? l : 1); - if (b->tid != tid || b->pos >= end) break; - if (!(l > begin && end > b->pos)) continue; - } - if (vc.flag & VC_CALL) { - bcf_p1rst_t pr; - bcf_p1_cal(b, p1, &pr); - if ((n_processed + 1) % 50000 == 0) bcf_p1_dump_afs(p1); - if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue; - update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag); - } - if (vc.flag & VC_NO_GENO) { - b->n_gi = 0; - b->fmt[0] = '\0'; - } - vcf_write(bout, h, b); - ++n_processed; - } - if (vc.prior_file) free(vc.prior_file); - if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); - bcf_hdr_destroy(h); - bcf_destroy(b); - vcf_close(bp); vcf_close(bout); - if (hash) kh_destroy(set64, hash); - if (vc.fn_list) free(vc.fn_list); - if (p1) bcf_p1_destroy(p1); - return 0; -} -