typedef struct {
int max_mq, min_mq, flag, min_baseQ, capQ_thres;
- double theta;
char *reg, *fn_pos;
faidx_t *fai;
kh_64_t *hash;
mplp_conf_t mplp;
memset(&mplp, 0, sizeof(mplp_conf_t));
mplp.max_mq = 60;
- mplp.theta = 1e-3;
mplp.min_baseQ = 13;
mplp.capQ_thres = 0;
- while ((c = getopt(argc, argv, "gf:r:l:M:q:t:Q:uaORC:")) >= 0) {
+ while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:")) >= 0) {
switch (c) {
- case 't': mplp.theta = atof(optarg); break;
case 'f':
mplp.fai = fai_load(optarg);
if (mplp.fai == 0) return 1;
case 'r': mplp.reg = strdup(optarg); break;
case 'l': mplp.fn_pos = strdup(optarg); break;
case 'g': mplp.flag |= MPLP_GLF; break;
- case 'u': mplp.flag |= MPLP_NO_COMP; break;
+ case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break;
case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break;
case 'O': mplp.flag |= MPLP_NO_ORPHAN; break;
case 'R': mplp.flag |= MPLP_REALN; break;
fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq);
fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ);
fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
- fprintf(stderr, " -t FLOAT scaled mutation rate [%lg]\n", mplp.theta);
fprintf(stderr, " -g generate BCF output\n");
fprintf(stderr, " -u do not compress BCF output\n");
fprintf(stderr, "\n");
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.8-17 (r760)"
+#define PACKAGE_VERSION "0.1.8-18 (r763)"
#endif
int bam_taf2baf(int argc, char *argv[]);
--- /dev/null
+.TH bcftools 1 "2 October 2010" "bcftools" "Bioinformatics tools"
+.SH NAME
+.PP
+bcftools - Utilities for the Binary Call Format (BCF) and VCF.
+.SH SYNOPSIS
+.PP
+bcftools index in.bcf
+.PP
+bcftools view in.bcf chr2:100-200 > out.vcf
+.PP
+bcftools view -vc in.bcf > out.vcf 2> out.afs
+
+.SH DESCRIPTION
+.PP
+Bcftools is a toolkit for processing VCF/BCF files, calling variants and
+estimating site allele frequencies and allele frequency spectrums.
+
+.SH COMMANDS AND OPTIONS
+
+.TP 10
+.B view
+.B bcftools view
+.RB [ \-cbuSAGgHvNQ ]
+.RB [ \-1
+.IR nGroup1 ]
+.RB [ \-l
+.IR listFile ]
+.RB [ \-t
+.IR mutRate ]
+.RB [ \-p
+.IR varThres ]
+.RB [ \-P
+.IR prior ]
+.I in.bcf
+.RI [ region ]
+
+Convert between BCF and VCF, call variant candidates and estimate allele
+frequencies.
+
+.B OPTIONS:
+.RS
+.TP 10
+.B -b
+Output in the BCF format. The default is VCF.
+.TP
+.B -c
+Call variants.
+.TP
+.B -v
+Output variant sites only (force -c)
+.TP
+.B -g
+Call per-sample genotypes at variant sites (force -c)
+.TP
+.B -u
+Uncompressed BCF output (force -b).
+.TP
+.B -S
+The input is VCF instead of BCF.
+.TP
+.B -A
+Retain all possible alternate alleles at variant sites. By default, this
+command discards unlikely alleles.
+.TP
+.B -G
+Suppress all individual genotype information.
+.TP
+.B -H
+Perform Hardy-Weiberg Equilibrium test. This will add computation time, sometimes considerably.
+.TP
+.B -N
+Skip sites where the REF field is not A/C/G/T
+.TP
+.B -Q
+Output the QCALL likelihood format
+.TP
+.BI "-1 " INT
+Number of group-1 samples. This option is used for dividing input into
+two groups for comparing. A zero value disables this functionality. [0]
+.TP
+.BI "-l " FILE
+List of sites at which information are outputted [all sites]
+.TP
+.BI "-t " FLOAT
+Scaled muttion rate for variant calling [0.001]
+.TP
+.BI "-p " FLOAT
+A site is considered to be a variant if P(ref|D)<FLOAT [0.5]
+.TP
+.BI "-P " STR
+Prior or initial allele frequency spectrum. If STR can be
+.IR full ,
+.IR cond2 ,
+.I flat
+or the file consisting of error output from a previous variant calling
+run.
+.RE
+
+.TP
+.B index
+.B bcftools index
+.I in.bcf
+
+Index sorted BCF for random access.
+.RE
+
+.TP
+.B cat
+.B bcftools cat
+.I in1.bcf
+.RI [ "in2.bcf " [ ... "]]]"
+
+Concatenate BCF files. The input files are required to be sorted and
+have identical samples appearing in the same order.
+.RE
#include "kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
-#define VC_NO_PL 1
#define VC_NO_GENO 2
#define VC_BCFOUT 4
#define VC_CALL 8
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5;
- while ((c = getopt(argc, argv, "N1:l:cHAGvLbSuP:t:p:Qg")) >= 0) {
+ while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:Qg")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.fn_list = strdup(optarg); break;
case 'N': vc.flag |= VC_ACGT_ONLY; break;
case 'G': vc.flag |= VC_NO_GENO; break;
- case 'L': vc.flag |= VC_NO_PL; break;
case 'A': vc.flag |= VC_KEEPALT; break;
case 'b': vc.flag |= VC_BCFOUT; break;
case 'S': vc.flag |= VC_VCFIN; break;
case 'c': vc.flag |= VC_CALL; break;
- case 'v': vc.flag |= VC_VARONLY; break;
+ case 'v': vc.flag |= VC_VARONLY | VC_CALL; break;
case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break;
case 'H': vc.flag |= VC_HWE; break;
- case 'g': vc.flag |= VC_CALL_GT; break;
+ case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break;
case 't': vc.theta = atof(optarg); break;
case 'p': vc.pref = atof(optarg); break;
case 'Q': vc.flag |= VC_QCALL; break;
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bcftools view [options] <in.bcf> [reg]\n\n");
fprintf(stderr, "Options: -c SNP calling\n");
+ fprintf(stderr, " -v output potential variant sites only (force -c)\n");
+ fprintf(stderr, " -g call genotypes at variant sites (force -c)\n");
fprintf(stderr, " -b output BCF instead of VCF\n");
- fprintf(stderr, " -u uncompressed BCF output\n");
+ fprintf(stderr, " -u uncompressed BCF output (force -b)\n");
fprintf(stderr, " -S input is VCF\n");
fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n");
fprintf(stderr, " -G suppress all individual genotype information\n");
- fprintf(stderr, " -g call genotypes for variant sites\n");
- fprintf(stderr, " -L discard the PL genotype field\n");
fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n");
- fprintf(stderr, " -v output potential variant sites only\n");
fprintf(stderr, " -N skip sites where REF is not A/C/G/T\n");
fprintf(stderr, " -Q output the QCALL likelihood format\n");
fprintf(stderr, " -1 INT number of group-1 samples [0]\n");
-.TH samtools 1 "11 July 2010" "samtools-0.1.8" "Bioinformatics tools"
+.TH samtools 1 "2 October 2010" "samtools-0.1.8" "Bioinformatics tools"
.SH NAME
.PP
samtools - Utilities for the Sequence Alignment/Map (SAM) format
.PP
samtools pileup -f ref.fasta aln.sorted.bam
.PP
-samtools mpileup -f ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
+samtools mpileup -C50 -agf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
.PP
samtools tview aln.sorted.bam ref.fasta
.TP
.B mpileup
-samtools mpileup [-r reg] [-f in.fa] in.bam [in2.bam [...]]
+samtools mpileup [-aug] [-C coef] [-r reg] [-f in.fa] [-l list] [-M capMapQ] [-Q minBaseQ] [-q minMapQ] in.bam [in2.bam [...]]
-Generate pileup for multiple BAM files. Consensus calling is not
-implemented.
+Generate BCF or pileup for one or multiple BAM files. Alignment records
+are grouped by sample identifiers in @RG header lines. If sample
+identifiers are absent, each input file is regarded as one sample.
.B OPTIONS:
.RS
.TP 8
+.B -a
+Perform HMM realignment to compute base alignment quality (BAQ). Base
+quality will be capped by BAQ.
+.TP
+.B -g
+Compute genotype likelihoods and output them in the binary call format (BCF).
+.TP
+.B -u
+Similar to
+.B -g
+except that the output is uncompressed BCF, which is preferred for pipeing.
+.TP
+.B -C INT
+Coefficient for downgrading mapping quality for reads containing
+excessive mismatches. Given a read with a phred-scaled probability q of
+being generated from the mapped position, the new mapping quality is
+about sqrt((INT-q)/INT)*INT. A zero value disables this
+functionality; if enabled, the recommended value is 50. [0]
+.TP
+.B -f FILE
+The reference file [null]
+.TP
+.B -l FILE
+File containing a list of sites where pileup or BCF is outputted [null]
+.TP
+.B -q INT
+Minimum mapping quality for an alignment to be used [0]
+.TP
+.B -Q INT
+Minimum base quality for a base to be considered [13]
+.TP
.B -r STR
Only generate pileup in region
.I STR
[all sites]
-.TP
-.B -f FILE
-The reference file [null]
.RE
.TP