From: Heng Li <lh3@live.co.uk>
Date: Sun, 3 Oct 2010 02:51:03 +0000 (+0000)
Subject:  * samtools-0.1.8-18 (r763)
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=dda27af89b70c52e075f6531d56c0bbccbc7246d;p=samtools.git

 * samtools-0.1.8-18 (r763)
 * added bcftools manual page
 * minor fix to mpileup and view command lines
---

diff --git a/bam_plcmd.c b/bam_plcmd.c
index af43918..c8ab7ba 100644
--- a/bam_plcmd.c
+++ b/bam_plcmd.c
@@ -461,7 +461,6 @@ int bam_pileup(int argc, char *argv[])
 
 typedef struct {
 	int max_mq, min_mq, flag, min_baseQ, capQ_thres;
-	double theta;
 	char *reg, *fn_pos;
 	faidx_t *fai;
 	kh_64_t *hash;
@@ -698,12 +697,10 @@ int bam_mpileup(int argc, char *argv[])
 	mplp_conf_t mplp;
 	memset(&mplp, 0, sizeof(mplp_conf_t));
 	mplp.max_mq = 60;
-	mplp.theta = 1e-3;
 	mplp.min_baseQ = 13;
 	mplp.capQ_thres = 0;
-	while ((c = getopt(argc, argv, "gf:r:l:M:q:t:Q:uaORC:")) >= 0) {
+	while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:")) >= 0) {
 		switch (c) {
-		case 't': mplp.theta = atof(optarg); break;
 		case 'f':
 			mplp.fai = fai_load(optarg);
 			if (mplp.fai == 0) return 1;
@@ -711,7 +708,7 @@ int bam_mpileup(int argc, char *argv[])
 		case 'r': mplp.reg = strdup(optarg); break;
 		case 'l': mplp.fn_pos = strdup(optarg); break;
 		case 'g': mplp.flag |= MPLP_GLF; break;
-		case 'u': mplp.flag |= MPLP_NO_COMP; break;
+		case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break;
 		case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break;
 		case 'O': mplp.flag |= MPLP_NO_ORPHAN; break;
 		case 'R': mplp.flag |= MPLP_REALN; break;
@@ -730,7 +727,6 @@ int bam_mpileup(int argc, char *argv[])
 		fprintf(stderr, "         -M INT      cap mapping quality at INT [%d]\n", mplp.max_mq);
 		fprintf(stderr, "         -Q INT      min base quality [%d]\n", mplp.min_baseQ);
 		fprintf(stderr, "         -q INT      filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
-		fprintf(stderr, "         -t FLOAT    scaled mutation rate [%lg]\n", mplp.theta);
 		fprintf(stderr, "         -g          generate BCF output\n");
 		fprintf(stderr, "         -u          do not compress BCF output\n");
 		fprintf(stderr, "\n");
diff --git a/bamtk.c b/bamtk.c
index ddc7b61..a7644d0 100644
--- a/bamtk.c
+++ b/bamtk.c
@@ -9,7 +9,7 @@
 #endif
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.8-17 (r760)"
+#define PACKAGE_VERSION "0.1.8-18 (r763)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);
diff --git a/bcftools/bcftools.1 b/bcftools/bcftools.1
new file mode 100644
index 0000000..ebff301
--- /dev/null
+++ b/bcftools/bcftools.1
@@ -0,0 +1,115 @@
+.TH bcftools 1 "2 October 2010" "bcftools" "Bioinformatics tools"
+.SH NAME
+.PP
+bcftools - Utilities for the Binary Call Format (BCF) and VCF.
+.SH SYNOPSIS
+.PP
+bcftools index in.bcf
+.PP
+bcftools view in.bcf chr2:100-200 > out.vcf
+.PP
+bcftools view -vc in.bcf > out.vcf 2> out.afs
+
+.SH DESCRIPTION
+.PP
+Bcftools is a toolkit for processing VCF/BCF files, calling variants and
+estimating site allele frequencies and allele frequency spectrums.
+
+.SH COMMANDS AND OPTIONS
+
+.TP 10
+.B view
+.B bcftools view
+.RB [ \-cbuSAGgHvNQ ]
+.RB [ \-1
+.IR nGroup1 ]
+.RB [ \-l
+.IR listFile ]
+.RB [ \-t
+.IR mutRate ]
+.RB [ \-p
+.IR varThres ]
+.RB [ \-P
+.IR prior ]
+.I in.bcf
+.RI [ region ]
+
+Convert between BCF and VCF, call variant candidates and estimate allele
+frequencies.
+
+.B OPTIONS:
+.RS
+.TP 10
+.B -b
+Output in the BCF format. The default is VCF.
+.TP
+.B -c
+Call variants.
+.TP
+.B -v
+Output variant sites only (force -c)
+.TP
+.B -g
+Call per-sample genotypes at variant sites (force -c)
+.TP
+.B -u
+Uncompressed BCF output (force -b).
+.TP
+.B -S
+The input is VCF instead of BCF.
+.TP
+.B -A
+Retain all possible alternate alleles at variant sites. By default, this
+command discards unlikely alleles.
+.TP
+.B -G
+Suppress all individual genotype information.
+.TP
+.B -H
+Perform Hardy-Weiberg Equilibrium test. This will add computation time, sometimes considerably.
+.TP
+.B -N
+Skip sites where the REF field is not A/C/G/T
+.TP
+.B -Q
+Output the QCALL likelihood format
+.TP
+.BI "-1 " INT
+Number of group-1 samples. This option is used for dividing input into
+two groups for comparing. A zero value disables this functionality. [0]
+.TP
+.BI "-l " FILE
+List of sites at which information are outputted [all sites]
+.TP
+.BI "-t " FLOAT
+Scaled muttion rate for variant calling [0.001]
+.TP
+.BI "-p " FLOAT
+A site is considered to be a variant if P(ref|D)<FLOAT [0.5]
+.TP
+.BI "-P " STR
+Prior or initial allele frequency spectrum. If STR can be
+.IR full ,
+.IR cond2 ,
+.I flat
+or the file consisting of error output from a previous variant calling
+run.
+.RE
+
+.TP
+.B index
+.B bcftools index
+.I in.bcf
+
+Index sorted BCF for random access.
+.RE
+
+.TP
+.B cat
+.B bcftools cat
+.I in1.bcf
+.RI [ "in2.bcf " [ ... "]]]"
+
+Concatenate BCF files. The input files are required to be sorted and
+have identical samples appearing in the same order.
+.RE
diff --git a/bcftools/call1.c b/bcftools/call1.c
index a5c03f4..1eacd4f 100644
--- a/bcftools/call1.c
+++ b/bcftools/call1.c
@@ -13,7 +13,6 @@ KHASH_SET_INIT_INT64(set64)
 #include "kseq.h"
 KSTREAM_INIT(gzFile, gzread, 16384)
 
-#define VC_NO_PL   1
 #define VC_NO_GENO 2
 #define VC_BCFOUT  4
 #define VC_CALL    8
@@ -211,21 +210,20 @@ int bcfview(int argc, char *argv[])
 	tid = begin = end = -1;
 	memset(&vc, 0, sizeof(viewconf_t));
 	vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5;
-	while ((c = getopt(argc, argv, "N1:l:cHAGvLbSuP:t:p:Qg")) >= 0) {
+	while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:Qg")) >= 0) {
 		switch (c) {
 		case '1': vc.n1 = atoi(optarg); break;
 		case 'l': vc.fn_list = strdup(optarg); break;
 		case 'N': vc.flag |= VC_ACGT_ONLY; break;
 		case 'G': vc.flag |= VC_NO_GENO; break;
-		case 'L': vc.flag |= VC_NO_PL; break;
 		case 'A': vc.flag |= VC_KEEPALT; break;
 		case 'b': vc.flag |= VC_BCFOUT; break;
 		case 'S': vc.flag |= VC_VCFIN; break;
 		case 'c': vc.flag |= VC_CALL; break;
-		case 'v': vc.flag |= VC_VARONLY; break;
+		case 'v': vc.flag |= VC_VARONLY | VC_CALL; break;
 		case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break;
 		case 'H': vc.flag |= VC_HWE; break;
-		case 'g': vc.flag |= VC_CALL_GT; break;
+		case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break;
 		case 't': vc.theta = atof(optarg); break;
 		case 'p': vc.pref = atof(optarg); break;
 		case 'Q': vc.flag |= VC_QCALL; break;
@@ -241,15 +239,14 @@ int bcfview(int argc, char *argv[])
 		fprintf(stderr, "\n");
 		fprintf(stderr, "Usage:   bcftools view [options] <in.bcf> [reg]\n\n");
 		fprintf(stderr, "Options: -c        SNP calling\n");
+		fprintf(stderr, "         -v        output potential variant sites only (force -c)\n");
+		fprintf(stderr, "         -g        call genotypes at variant sites (force -c)\n");
 		fprintf(stderr, "         -b        output BCF instead of VCF\n");
-		fprintf(stderr, "         -u        uncompressed BCF output\n");
+		fprintf(stderr, "         -u        uncompressed BCF output (force -b)\n");
 		fprintf(stderr, "         -S        input is VCF\n");
 		fprintf(stderr, "         -A        keep all possible alternate alleles at variant sites\n");
 		fprintf(stderr, "         -G        suppress all individual genotype information\n");
-		fprintf(stderr, "         -g        call genotypes for variant sites\n");
-		fprintf(stderr, "         -L        discard the PL genotype field\n");
 		fprintf(stderr, "         -H        perform Hardy-Weinberg test (slower)\n");
-		fprintf(stderr, "         -v        output potential variant sites only\n");
 		fprintf(stderr, "         -N        skip sites where REF is not A/C/G/T\n");
 		fprintf(stderr, "         -Q        output the QCALL likelihood format\n");
 		fprintf(stderr, "         -1 INT    number of group-1 samples [0]\n");
diff --git a/samtools.1 b/samtools.1
index d79d176..2bfeb43 100644
--- a/samtools.1
+++ b/samtools.1
@@ -1,4 +1,4 @@
-.TH samtools 1 "11 July 2010" "samtools-0.1.8" "Bioinformatics tools"
+.TH samtools 1 "2 October 2010" "samtools-0.1.8" "Bioinformatics tools"
 .SH NAME
 .PP
 samtools - Utilities for the Sequence Alignment/Map (SAM) format
@@ -20,7 +20,7 @@ samtools faidx ref.fasta
 .PP
 samtools pileup -f ref.fasta aln.sorted.bam
 .PP
-samtools mpileup -f ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
+samtools mpileup -C50 -agf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
 .PP
 samtools tview aln.sorted.bam ref.fasta
 
@@ -249,21 +249,50 @@ Phred probability of an indel in sequencing/prep. [40]
 
 .TP
 .B mpileup
-samtools mpileup [-r reg] [-f in.fa] in.bam [in2.bam [...]]
+samtools mpileup [-aug] [-C coef] [-r reg] [-f in.fa] [-l list] [-M capMapQ] [-Q minBaseQ] [-q minMapQ] in.bam [in2.bam [...]]
 
-Generate pileup for multiple BAM files. Consensus calling is not
-implemented.
+Generate BCF or pileup for one or multiple BAM files. Alignment records
+are grouped by sample identifiers in @RG header lines. If sample
+identifiers are absent, each input file is regarded as one sample.
 
 .B OPTIONS:
 .RS
 .TP 8
+.B -a
+Perform HMM realignment to compute base alignment quality (BAQ). Base
+quality will be capped by BAQ.
+.TP
+.B -g
+Compute genotype likelihoods and output them in the binary call format (BCF).
+.TP
+.B -u
+Similar to
+.B -g
+except that the output is uncompressed BCF, which is preferred for pipeing.
+.TP
+.B -C INT
+Coefficient for downgrading mapping quality for reads containing
+excessive mismatches. Given a read with a phred-scaled probability q of
+being generated from the mapped position, the new mapping quality is
+about sqrt((INT-q)/INT)*INT. A zero value disables this
+functionality; if enabled, the recommended value is 50. [0]
+.TP
+.B -f FILE
+The reference file [null]
+.TP
+.B -l FILE
+File containing a list of sites where pileup or BCF is outputted [null]
+.TP
+.B -q INT
+Minimum mapping quality for an alignment to be used [0]
+.TP
+.B -Q INT
+Minimum base quality for a base to be considered [13]
+.TP
 .B -r STR
 Only generate pileup in region
 .I STR
 [all sites]
-.TP
-.B -f FILE
-The reference file [null]
 .RE
 
 .TP