typedef struct {
int flag, prior_type, n1, n_sub, *sublist;
- char *fn_list, *prior_file, **subsam;
+ char *fn_list, *prior_file, **subsam, *fn_dict;
double theta, pref, indel_frac;
} viewconf_t;
kstring_t s;
int dret, n = 0, max = 0;
char **sam = 0;
+ *_n = 0;
s.l = s.m = 0; s.s = 0;
fp = gzopen(fn, "r");
+ if (fp == 0) return 0; // fail to open file
ks = ks_init(fp);
while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
if (max == n) {
max = max? max<<1 : 4;
- sam = realloc(sam, sizeof(void*));
+ sam = realloc(sam, sizeof(void*)*max);
}
sam[n++] = strdup(s.s);
}
if (!strstr(str.s, "##INFO=<ID=PV4,"))
kputs("##INFO=<ID=PV4,Number=4,Type=Float,Description=\"P-values for strand bias, baseQ bias, mapQ bias and tail distance bias\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=INDEL,"))
- kputs("##INFO=<ID=INDEL,Number=0,Type=Flag,Descriptin=\"Indicates that the variant is an INDEL.\">\n", &str);
+ kputs("##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=GT,"))
kputs("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n", &str);
if (!strstr(str.s, "##FORMAT=<ID=GQ,"))
memset(&vc, 0, sizeof(viewconf_t));
vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.;
vc.n_sub = 0; vc.subsam = 0; vc.sublist = 0;
- while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:IMs:")) >= 0) {
+ while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:IMs:D:")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.fn_list = strdup(optarg); break;
+ case 'D': vc.fn_dict = strdup(optarg); break;
case 'N': vc.flag |= VC_ACGT_ONLY; break;
case 'G': vc.flag |= VC_NO_GENO; break;
case 'A': vc.flag |= VC_KEEPALT; break;
fprintf(stderr, " -Q output the QCALL likelihood format\n");
fprintf(stderr, " -L calculate LD for adjacent sites\n");
fprintf(stderr, " -I skip indels\n");
+ fprintf(stderr, " -D FILE sequence dictionary for VCF->BCF conversion [null]\n");
fprintf(stderr, " -1 INT number of group-1 samples [0]\n");
fprintf(stderr, " -l FILE list of sites to output [all sites]\n");
fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4g]\n", vc.theta);
return 1;
}
+ if ((vc.flag & VC_VCFIN) && (vc.flag & VC_BCFOUT) && vc.fn_dict == 0) {
+ fprintf(stderr, "[%s] For VCF->BCF conversion please specify the sequence dictionary with -D\n", __func__);
+ return 1;
+ }
b = calloc(1, sizeof(bcf1_t));
blast = calloc(1, sizeof(bcf1_t));
strcpy(moder, "r");
if (vc.flag & VC_UNCOMP) strcat(modew, "u");
bp = vcf_open(argv[optind], moder);
hin = hout = vcf_hdr_read(bp);
+ if (vc.fn_dict && (vc.flag & VC_VCFIN))
+ vcf_dictread(bp, hin, vc.fn_dict);
bout = vcf_open("-", modew);
if (!(vc.flag & VC_QCALL)) {
if (vc.n_sub) {
vcf_close(bp); vcf_close(bout);
if (hash) kh_destroy(set64, hash);
if (vc.fn_list) free(vc.fn_list);
+ if (vc.fn_dict) free(vc.fn_dict);
if (vc.n_sub) {
int i;
for (i = 0; i < vc.n_sub; ++i) free(vc.subsam[i]);