From f560d9a2a23f21b1376a186b04d99099ac5b07f9 Mon Sep 17 00:00:00 2001
From: Heng Li <lh3@live.co.uk>
Date: Tue, 16 Nov 2010 04:31:53 +0000
Subject: [PATCH]  * samtools-0.1.9-17 (r824)  * added command line options to
 change the default parameters in indel calling  * update the manual

---
 bam_plcmd.c      |  16 +-
 bamtk.c          |   2 +-
 bcftools/call1.c |  12 +-
 samtools.1       | 374 +++++++++++++++++++++++++----------------------
 4 files changed, 223 insertions(+), 181 deletions(-)

diff --git a/bam_plcmd.c b/bam_plcmd.c
index ad6134b..e3f73aa 100644
--- a/bam_plcmd.c
+++ b/bam_plcmd.c
@@ -532,9 +532,11 @@ int bam_pileup(int argc, char *argv[])
 #define MPLP_REALN   0x80
 #define MPLP_FMT_DP 0x100
 #define MPLP_FMT_SP 0x200
+#define MPLP_NO_INDEL 0x400
 
 typedef struct {
 	int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth;
+	int openQ, extQ, tandemQ;
 	char *reg, *fn_pos, *pl_list;
 	faidx_t *fai;
 	kh_64_t *hash;
@@ -698,6 +700,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
 		bca = bcf_call_init(-1., conf->min_baseQ);
 		bcr = calloc(sm->n, sizeof(bcf_callret1_t));
 		bca->rghash = rghash;
+		bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ;
 	}
 	ref_tid = -1; ref = 0;
 	iter = bam_mplp_init(n, mplp_func, (void**)data);
@@ -736,7 +739,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
 			bcf_write(bp, bh, b);
 			bcf_destroy(b);
 			// call indels
-			if (bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) {
+			if (!(conf->flag&MPLP_NO_INDEL) && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) {
 				for (i = 0; i < gplp.n; ++i)
 					bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
 				if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) {
@@ -853,8 +856,9 @@ int bam_mpileup(int argc, char *argv[])
 	mplp.min_baseQ = 13;
 	mplp.capQ_thres = 0;
 	mplp.max_depth = 250;
+	mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
 	mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
-	while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDSd:b:P:")) >= 0) {
+	while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDSd:b:P:o:e:h:I")) >= 0) {
 		switch (c) {
 		case 'f':
 			mplp.fai = fai_load(optarg);
@@ -872,11 +876,15 @@ int bam_mpileup(int argc, char *argv[])
 		case 'R': mplp.flag |= MPLP_REALN; break;
 		case 'D': mplp.flag |= MPLP_FMT_DP; break;
 		case 'S': mplp.flag |= MPLP_FMT_SP; break;
+		case 'I': mplp.flag |= MPLP_NO_INDEL; break;
 		case 'C': mplp.capQ_thres = atoi(optarg); break;
 		case 'M': mplp.max_mq = atoi(optarg); break;
 		case 'q': mplp.min_mq = atoi(optarg); break;
 		case 'Q': mplp.min_baseQ = atoi(optarg); break;
         case 'b': file_list = optarg; break;
+		case 'o': mplp.openQ = atoi(optarg); break;
+		case 'e': mplp.extQ = atoi(optarg); break;
+		case 'h': mplp.tandemQ = atoi(optarg); break;
 		}
 	}
 	if (argc == 1) {
@@ -891,11 +899,15 @@ int bam_mpileup(int argc, char *argv[])
 		fprintf(stderr, "         -q INT      filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
 		fprintf(stderr, "         -d INT      max per-sample depth [%d]\n", mplp.max_depth);
 		fprintf(stderr, "         -P STR      comma separated list of platforms for indels [all]\n");
+		fprintf(stderr, "         -o INT      Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ);
+		fprintf(stderr, "         -e INT      Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ);
+		fprintf(stderr, "         -h INT      coefficient for homopolyer errors [%d]\n", mplp.tandemQ);
 		fprintf(stderr, "         -g          generate BCF output\n");
 		fprintf(stderr, "         -u          do not compress BCF output\n");
 		fprintf(stderr, "         -B          disable BAQ computation\n");
 		fprintf(stderr, "         -D          output per-sample DP\n");
 		fprintf(stderr, "         -S          output per-sample SP (strand bias P-value, slow)\n");
+		fprintf(stderr, "         -I          do not perform indel calling\n");
 		fprintf(stderr, "\n");
 		fprintf(stderr, "Notes: Assuming diploid individuals.\n\n");
 		return 1;
diff --git a/bamtk.c b/bamtk.c
index ef11b06..7923b04 100644
--- a/bamtk.c
+++ b/bamtk.c
@@ -9,7 +9,7 @@
 #endif
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.9-16 (r823)"
+#define PACKAGE_VERSION "0.1.9-17 (r824)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);
diff --git a/bcftools/call1.c b/bcftools/call1.c
index 5a5c1cd..15f6f8b 100644
--- a/bcftools/call1.c
+++ b/bcftools/call1.c
@@ -29,7 +29,7 @@ KSTREAM_INIT(gzFile, gzread, 16384)
 typedef struct {
 	int flag, prior_type, n1;
 	char *fn_list, *prior_file;
-	double theta, pref;
+	double theta, pref, indel_frac;
 } viewconf_t;
 
 khash_t(set64) *bcf_load_pos(const char *fn, bcf_hdr_t *_h)
@@ -200,6 +200,7 @@ double bcf_ld_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
 int bcfview(int argc, char *argv[])
 {
 	extern int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b);
+	extern void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x);
 	bcf_t *bp, *bout = 0;
 	bcf1_t *b, *blast;
 	int c;
@@ -213,8 +214,8 @@ int bcfview(int argc, char *argv[])
 
 	tid = begin = end = -1;
 	memset(&vc, 0, sizeof(viewconf_t));
-	vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5;
-	while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgL")) >= 0) {
+	vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.;
+	while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:")) >= 0) {
 		switch (c) {
 		case '1': vc.n1 = atoi(optarg); break;
 		case 'l': vc.fn_list = strdup(optarg); break;
@@ -230,6 +231,7 @@ int bcfview(int argc, char *argv[])
 		case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break;
 		case 't': vc.theta = atof(optarg); break;
 		case 'p': vc.pref = atof(optarg); break;
+		case 'i': vc.indel_frac = atof(optarg); break;
 		case 'Q': vc.flag |= VC_QCALL; break;
 		case 'L': vc.flag |= VC_ADJLD; break;
 		case 'P':
@@ -257,7 +259,8 @@ int bcfview(int argc, char *argv[])
 		fprintf(stderr, "         -L        calculate LD for adjacent sites\n");
 		fprintf(stderr, "         -1 INT    number of group-1 samples [0]\n");
 		fprintf(stderr, "         -l FILE   list of sites to output [all sites]\n");
-		fprintf(stderr, "         -t FLOAT  scaled mutation rate [%.4lg]\n", vc.theta);
+		fprintf(stderr, "         -t FLOAT  scaled substitution mutation rate [%.4lg]\n", vc.theta);
+		fprintf(stderr, "         -i FLOAT  indel-to-substitution ratio [%.4lg]\n", vc.indel_frac);
 		fprintf(stderr, "         -p FLOAT  variant if P(ref|D)<FLOAT [%.3lg]\n", vc.pref);
 		fprintf(stderr, "         -P STR    type of prior: full, cond2, flat [full]\n");
 		fprintf(stderr, "\n");
@@ -287,6 +290,7 @@ int bcfview(int argc, char *argv[])
 			bcf_p1_set_n1(p1, vc.n1);
 			bcf_p1_init_subprior(p1, vc.prior_type, vc.theta);
 		}
+		if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac);
 	}
 	if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h);
 	if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) {
diff --git a/samtools.1 b/samtools.1
index 7de9857..17db537 100644
--- a/samtools.1
+++ b/samtools.1
@@ -1,4 +1,4 @@
-.TH samtools 1 "27 October 2010" "samtools-0.1.9" "Bioinformatics tools"
+.TH samtools 1 "15 November 2010" "samtools-0.1.10" "Bioinformatics tools"
 .SH NAME
 .PP
 samtools - Utilities for the Sequence Alignment/Map (SAM) format
@@ -20,7 +20,7 @@ samtools faidx ref.fasta
 .PP
 samtools pileup -vcf ref.fasta aln.sorted.bam
 .PP
-samtools mpileup -C50 -agf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
+samtools mpileup -C50 -gf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
 .PP
 samtools tview aln.sorted.bam ref.fasta
 
@@ -65,11 +65,11 @@ format: `chr2' (the whole chr2), `chr2:1000000' (region starting from
 .B -b
 Output in the BAM format.
 .TP
-.BI -f " INT"
+.BI -f \ INT
 Only output alignments with all bits in INT present in the FLAG
 field. INT can be in hex in the format of /^0x[0-9A-F]+/ [0]
 .TP
-.BI -F " INT"
+.BI -F \ INT
 Skip alignments with bits present in INT [0]
 .TP
 .B -h
@@ -78,19 +78,19 @@ Include the header in the output.
 .B -H
 Output the header only.
 .TP
-.BI -l " STR"
+.BI -l \ STR
 Only output reads in library STR [null]
 .TP
-.BI -o " FILE"
+.BI -o \ FILE
 Output file [stdout]
 .TP
-.BI -q " INT"
+.BI -q \ INT
 Skip alignments with MAPQ smaller than INT [0]
 .TP
-.BI -r " STR"
+.BI -r \ STR
 Only output reads in read group STR [null]
 .TP
-.BI -R " FILE"
+.BI -R \ FILE
 Output reads in read groups listed in
 .I FILE
 [null]
@@ -109,7 +109,7 @@ and
 .B `-q'
 , are taken into account.
 .TP
-.BI -t " FILE"
+.BI -t \ FILE
 This file is TAB-delimited. Each line must contain the reference name
 and the length of the reference, one line for each distinct reference;
 additional fields are ignored. This file also defines the order of the
@@ -135,135 +135,6 @@ press `?' for help and press `g' to check the alignment start from a
 region in the format like `chr10:10,000,000' or `=10,000,000' when
 viewing the same reference sequence.
 
-.TP
-.B pileup
-samtools pileup [-2sSBicv] [-f in.ref.fasta] [-t in.ref_list] [-l
-in.site_list] [-C capMapQ] [-M maxMapQ] [-T theta] [-N nHap] [-r
-pairDiffRate] [-m mask] [-d maxIndelDepth] [-G indelPrior]
-<in.bam>|<in.sam>
-
-Print the alignment in the pileup format. In the pileup format, each
-line represents a genomic position, consisting of chromosome name,
-coordinate, reference base, read bases, read qualities and alignment
-mapping qualities. Information on match, mismatch, indel, strand,
-mapping quality and start and end of a read are all encoded at the read
-base column. At this column, a dot stands for a match to the reference
-base on the forward strand, a comma for a match on the reverse strand,
-a '>' or '<' for a reference skip, `ACGTN' for a mismatch on the forward
-strand and `acgtn' for a mismatch on the reverse strand. A pattern
-`\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this
-reference position and the next reference position. The length of the
-insertion is given by the integer in the pattern, followed by the
-inserted sequence. Similarly, a pattern `-[0-9]+[ACGTNacgtn]+'
-represents a deletion from the reference. The deleted bases will be
-presented as `*' in the following lines. Also at the read base column, a
-symbol `^' marks the start of a read. The ASCII of the character
-following `^' minus 33 gives the mapping quality. A symbol `$' marks the
-end of a read segment.
-
-If option
-.B -c
-is applied, the consensus base, Phred-scaled consensus quality, SNP
-quality (i.e. the Phred-scaled probability of the consensus being
-identical to the reference) and root mean square (RMS) mapping quality
-of the reads covering the site will be inserted between the `reference
-base' and the `read bases' columns. An indel occupies an additional
-line. Each indel line consists of chromosome name, coordinate, a star,
-the genotype, consensus quality, SNP quality, RMS mapping quality, #
-covering reads, the first alllele, the second allele, # reads supporting
-the first allele, # reads supporting the second allele and # reads
-containing indels different from the top two alleles.
-
-The position of indels is offset by -1.
-
-.B OPTIONS:
-.RS
-.TP 10
-.B -B
-Disable the BAQ computation. See the
-.B mpileup
-command for details.
-.TP
-.B -c
-Call the consensus sequence using SOAPsnp consensus model. Options
-.BR -T ", " -N ", " -I " and " -r
-are only effective when
-.BR -c " or " -g
-is in use.
-.TP
-.BI -C " INT"
-Coefficient for downgrading the mapping quality of poorly mapped
-reads. See the
-.B mpileup
-command for details. [0]
-.TP
-.BI -d " INT"
-Use the first
-.I NUM
-reads in the pileup for indel calling for speed up. Zero for unlimited. [1024]
-.TP
-.BI -f " FILE"
-The reference sequence in the FASTA format. Index file
-.I FILE.fai
-will be created if
-absent.
-.TP
-.B -g
-Generate genotype likelihood in the binary GLFv3 format. This option
-suppresses -c, -i and -s. This option is deprecated by the
-.B mpileup
-command.
-.TP
-.B -i
-Only output pileup lines containing indels.
-.TP
-.BI -I " INT"
-Phred probability of an indel in sequencing/prep. [40]
-.TP
-.BI -l " FILE"
-List of sites at which pileup is output. This file is space
-delimited. The first two columns are required to be chromosome and
-1-based coordinate. Additional columns are ignored. It is
-recommended to use option
-.TP
-.BI -m " INT"
-Filter reads with flag containing bits in
-.I INT
-[1796]
-.TP
-.BI -M " INT"
-Cap mapping quality at INT [60]
-.TP
-.BI -N " INT"
-Number of haplotypes in the sample (>=2) [2]
-.TP
-.BI -r " FLOAT"
-Expected fraction of differences between a pair of haplotypes [0.001]
-.TP
-.B -s
-Print the mapping quality as the last column. This option makes the
-output easier to parse, although this format is not space efficient.
-.TP
-.B -S
-The input file is in SAM.
-.TP
-.BI -t " FILE"
-List of reference names ane sequence lengths, in the format described
-for the
-.B import
-command. If this option is present, samtools assumes the input
-.I <in.alignment>
-is in SAM format; otherwise it assumes in BAM format.
-.B -s
-together with
-.B -l
-as in the default format we may not know the mapping quality.
-.TP
-.BI -T " FLOAT"
-The theta parameter (error dependency coefficient) in the maq consensus
-calling model [0.85]
-.RE
-
 .TP
 .B mpileup
 samtools mpileup [-Bug] [-C capQcoef] [-r reg] [-f in.fa] [-l list] [-M capMapQ] [-Q minBaseQ] [-q minMapQ] in.bam [in2.bam [...]]
@@ -281,37 +152,64 @@ quality (BAQ). BAQ is the Phred-scaled probability of a read base being
 misaligned. Applying this option greatly helps to reduce false SNPs
 caused by misalignments.
 .TP
-.BI -C " INT"
+.BI -C \ INT
 Coefficient for downgrading mapping quality for reads containing
 excessive mismatches. Given a read with a phred-scaled probability q of
 being generated from the mapped position, the new mapping quality is
 about sqrt((INT-q)/INT)*INT. A zero value disables this
-functionality; if enabled, the recommended value is 50. [0]
+functionality; if enabled, the recommended value for BWA is 50. [0]
+.TP
+.BI -e \ INT
+Phred-scaled gap extension sequencing error probability. Reducing
+.I INT
+leads to longer indels. [20]
 .TP
-.BI -f " FILE"
+.BI -f \ FILE
 The reference file [null]
 .TP
 .B -g
 Compute genotype likelihoods and output them in the binary call format (BCF).
 .TP
-.B -u
-Similar to
-.B -g
-except that the output is uncompressed BCF, which is preferred for pipeing.
-.TP
-.BI -l " FILE"
+.BI -h \ INT
+Coefficient for modeling homopolymer errors. Given an
+.IR l -long
+homopolymer
+run, the sequencing error of an indel of size
+.I s
+is modeled as
+.IR INT * s / l .
+[100]
+.TP
+.BI -l \ FILE
 File containing a list of sites where pileup or BCF is outputted [null]
 .TP
-.BI -q " INT"
+.BI -o \ INT
+Phred-scaled gap open sequencing error probability. Reducing
+.I INT
+leads to more indel calls. [40]
+.TP
+.BI -P \ STR
+Comma dilimited list of platforms (determined by
+.BR @RG-PL )
+from which indel candidates are obtained. It is recommended to collect
+indel candidates from sequencing technologies that have low indel error
+rate such as ILLUMINA. [all]
+.TP
+.BI -q \ INT
 Minimum mapping quality for an alignment to be used [0]
 .TP
-.BI -Q " INT"
+.BI -Q \ INT
 Minimum base quality for a base to be considered [13]
 .TP
-.BI -r " STR"
+.BI -r \ STR
 Only generate pileup in region
 .I STR
 [all sites]
+.TP
+.B -u
+Similar to
+.B -g
+except that the output is uncompressed BCF, which is preferred for piping.
 .RE
 
 .TP
@@ -345,7 +243,7 @@ Output the final alignment to the standard output.
 .B -n
 Sort by read names rather than by chromosomal coordinates
 .TP
-.B -m INT
+.BI -m \ INT
 Approximately the maximum required memory. [500000000]
 .RE
 
@@ -368,7 +266,7 @@ and the headers of other files will be ignored.
 .B OPTIONS:
 .RS
 .TP 8
-.BI -h " FILE"
+.BI -h \ FILE
 Use the lines of
 .I FILE
 as `@' headers to be copied to
@@ -379,7 +277,7 @@ replacing any header lines that would otherwise be copied from
 is actually in SAM format, though any alignment records it may contain
 are ignored.)
 .TP
-.BI -R " STR"
+.BI -R \ STR
 Merge files in the specified region indicated by
 .I STR
 .TP
@@ -479,7 +377,7 @@ Output compressed BAM
 .B -S
 The input is SAM with header lines
 .TP
-.BI -C " INT"
+.BI -C \ INT
 Coefficient to cap mapping quality of poorly mapped reads. See the
 .B pileup
 command for details. [0]
@@ -489,6 +387,136 @@ Perform probabilistic realignment to compute BAQ, which will be used to
 cap base quality.
 .RE
 
+.TP
+.B pileup
+samtools pileup [-2sSBicv] [-f in.ref.fasta] [-t in.ref_list] [-l
+in.site_list] [-C capMapQ] [-M maxMapQ] [-T theta] [-N nHap] [-r
+pairDiffRate] [-m mask] [-d maxIndelDepth] [-G indelPrior]
+<in.bam>|<in.sam>
+
+Print the alignment in the pileup format. In the pileup format, each
+line represents a genomic position, consisting of chromosome name,
+coordinate, reference base, read bases, read qualities and alignment
+mapping qualities. Information on match, mismatch, indel, strand,
+mapping quality and start and end of a read are all encoded at the read
+base column. At this column, a dot stands for a match to the reference
+base on the forward strand, a comma for a match on the reverse strand,
+a '>' or '<' for a reference skip, `ACGTN' for a mismatch on the forward
+strand and `acgtn' for a mismatch on the reverse strand. A pattern
+`\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this
+reference position and the next reference position. The length of the
+insertion is given by the integer in the pattern, followed by the
+inserted sequence. Similarly, a pattern `-[0-9]+[ACGTNacgtn]+'
+represents a deletion from the reference. The deleted bases will be
+presented as `*' in the following lines. Also at the read base column, a
+symbol `^' marks the start of a read. The ASCII of the character
+following `^' minus 33 gives the mapping quality. A symbol `$' marks the
+end of a read segment.
+
+If option
+.B -c
+is applied, the consensus base, Phred-scaled consensus quality, SNP
+quality (i.e. the Phred-scaled probability of the consensus being
+identical to the reference) and root mean square (RMS) mapping quality
+of the reads covering the site will be inserted between the `reference
+base' and the `read bases' columns. An indel occupies an additional
+line. Each indel line consists of chromosome name, coordinate, a star,
+the genotype, consensus quality, SNP quality, RMS mapping quality, #
+covering reads, the first alllele, the second allele, # reads supporting
+the first allele, # reads supporting the second allele and # reads
+containing indels different from the top two alleles.
+
+.B NOTE:
+Since 0.1.10, the `pileup' command is deprecated by `mpileup'.
+
+.B OPTIONS:
+.RS
+.TP 10
+.B -B
+Disable the BAQ computation. See the
+.B mpileup
+command for details.
+.TP
+.B -c
+Call the consensus sequence. Options
+.BR -T ", " -N ", " -I " and " -r
+are only effective when
+.BR -c " or " -g
+is in use.
+.TP
+.BI -C \ INT
+Coefficient for downgrading the mapping quality of poorly mapped
+reads. See the
+.B mpileup
+command for details. [0]
+.TP
+.BI -d \ INT
+Use the first
+.I NUM
+reads in the pileup for indel calling for speed up. Zero for unlimited. [1024]
+.TP
+.BI -f \ FILE
+The reference sequence in the FASTA format. Index file
+.I FILE.fai
+will be created if
+absent.
+.TP
+.B -g
+Generate genotype likelihood in the binary GLFv3 format. This option
+suppresses -c, -i and -s. This option is deprecated by the
+.B mpileup
+command.
+.TP
+.B -i
+Only output pileup lines containing indels.
+.TP
+.BI -I \ INT
+Phred probability of an indel in sequencing/prep. [40]
+.TP
+.BI -l \ FILE
+List of sites at which pileup is output. This file is space
+delimited. The first two columns are required to be chromosome and
+1-based coordinate. Additional columns are ignored. It is
+recommended to use option
+.TP
+.BI -m \ INT
+Filter reads with flag containing bits in
+.I INT
+[1796]
+.TP
+.BI -M \ INT
+Cap mapping quality at INT [60]
+.TP
+.BI -N \ INT
+Number of haplotypes in the sample (>=2) [2]
+.TP
+.BI -r \ FLOAT
+Expected fraction of differences between a pair of haplotypes [0.001]
+.TP
+.B -s
+Print the mapping quality as the last column. This option makes the
+output easier to parse, although this format is not space efficient.
+.TP
+.B -S
+The input file is in SAM.
+.TP
+.BI -t \ FILE
+List of reference names ane sequence lengths, in the format described
+for the
+.B import
+command. If this option is present, samtools assumes the input
+.I <in.alignment>
+is in SAM format; otherwise it assumes in BAM format.
+.B -s
+together with
+.B -l
+as in the default format we may not know the mapping quality.
+.TP
+.BI -T \ FLOAT
+The theta parameter (error dependency coefficient) in the maq consensus
+calling model [0.85]
+.RE
+
 .SH SAM FORMAT
 
 SAM is TAB-delimited. Apart from the header lines, which are started
@@ -582,9 +610,8 @@ will be attached
 .IP o 2
 Call SNPs and short indels for one diploid individual:
 
-  samtools pileup -vcf ref.fa aln.bam > var.raw.plp
-  samtools.pl varFilter -D 100 var.raw.plp > var.flt.plp
-  awk '($3=="*"&&$6>=50)||($3!="*"&&$6>=20)' var.flt.plp > var.final.plp
+  samtools mpileup -ugf ref.fa aln.bam | bcftools view -bvcg - > var.raw.bcf
+  bcftools view var.raw.bcf | vcfutils.pl varFilter -D 100 > var.flt.vcf
 
 The
 .B -D
@@ -592,38 +619,37 @@ option of varFilter controls the maximum read depth, which should be
 adjusted to about twice the average read depth.  One may consider to add
 .B -C50
 to
-.B pileup
+.B mpileup
 if mapping quality is overestimated for reads containing excessive
 mismatches. Applying this option usually helps
 .B BWA-short
-but may not other mappers. It also potentially increases reference
-biases.
+but may not other mappers.
 
 .IP o 2
-Call SNPs (not short indels) for multiple diploid individuals:
+Call SNPs and short indels for multiple diploid individuals:
 
-  samtools mpileup -augf ref.fa *.bam | bcftools view -bcv - > snp.raw.bcf
-  bcftools view snp.raw.bcf | vcfutils.pl filter4vcf -D 2000 | bgzip > snp.flt.vcf.gz
+  samtools mpileup -P ILLUMINA -ugf ref.fa *.bam | bcftools view -bcvg - > var.raw.bcf
+  bcftools view var.raw.bcf | vcfutils.pl varFilter -D 2000 > var.flt.vcf
 
 Individuals are identified from the
 .B SM
 tags in the
 .B @RG
 header lines. Individuals can be pooled in one alignment file; one
-individual can also be separated into multiple files. In addition, one
-may consider to apply
-.B -C50
-to
-.BR mpileup .
-
-SNP calling with mpileup also works for single sample and has the
-advantage of enabling more powerful filtering. The drawback is the lack
-of short indel calling, which may be implemented in future.
+individual can also be separated into multiple files. The
+.B -P
+option specifies that indel candidates should be collected only from
+read groups with the
+.B @RG-PL
+tag set to
+.IR ILLUMINA .
+Collecting indel candidates from reads sequenced by an indel-prone
+technology may affect the performance of indel calling.
 
 .IP o 2
 Derive the allele frequency spectrum (AFS) on a list of sites from multiple individuals:
 
-  samtools mpileup -gf ref.fa *.bam > all.bcf
+  samtools mpileup -Igf ref.fa *.bam > all.bcf
   bcftools view -bl sites.list all.bcf > sites.bcf
   bcftools view -cGP cond2 sites.bcf > /dev/null 2> sites.1.afs
   bcftools view -cGP sites.1.afs sites.bcf > /dev/null 2> sites.2.afs
-- 
2.39.5