Updated samtools to 0.1.19

[rsem.git] / sam / samtools.1
diff --git a/sam/samtools.1 b/sam/samtools.1

index 98ce9d04d92d3f38c36ef8809962ea155359c38f..5923abd52608ee9003cbe17c49e93755f67a647f 100644 (file)
--- a/sam/samtools.1
+++ b/sam/samtools.1
@@ -1,4 +1,4 @@
-.TH samtools 1 "05 July 2011" "samtools-0.1.17" "Bioinformatics tools"
+.TH samtools 1 "15 March 2013" "samtools-0.1.19" "Bioinformatics tools"
  .SH NAME
  .PP
  samtools - Utilities for the Sequence Alignment/Map (SAM) format
@@ -30,7 +30,7 @@ bcftools index in.bcf
  .PP
  bcftools view in.bcf chr2:100-200 > out.vcf
  .PP
-bcftools view -vc in.bcf > out.vcf 2> out.afs
+bcftools view -Nvm0.99 in.bcf > out.vcf 2> out.afs
  
  .SH DESCRIPTION
  .PP
@@ -69,7 +69,7 @@ format: `chr2' (the whole chr2), `chr2:1000000' (region starting from
  
  .B OPTIONS:
  .RS
-.TP 8
+.TP 10
  .B -b
  Output in the BAM format.
  .TP
@@ -103,6 +103,10 @@ Output reads in read groups listed in
  .I FILE
  [null]
  .TP
+.BI -s \ FLOAT
+Fraction of templates/pairs to subsample; the integer part is treated as the
+seed for the random number generator [-1]
+.TP
  .B -S
  Input is in SAM. If @SQ header lines are absent, the
  .B `-t'
@@ -136,17 +140,38 @@ to another samtools command.
  
  .TP
  .B tview
-samtools tview <in.sorted.bam> [ref.fasta]
+samtools tview 
+.RB [ \-p 
+.IR chr:pos ]
+.RB [ \-s 
+.IR STR ]
+.RB [ \-d 
+.IR display ] 
+.RI <in.sorted.bam> 
+.RI [ref.fasta]
  
  Text alignment viewer (based on the ncurses library). In the viewer,
  press `?' for help and press `g' to check the alignment start from a
  region in the format like `chr10:10,000,000' or `=10,000,000' when
  viewing the same reference sequence.
  
+.B Options:
+.RS
+.TP 14
+.BI -d \ display
+Output as (H)tml or (C)urses or (T)ext
+.TP
+.BI -p \ chr:pos
+Go directly to this position
+.TP
+.BI -s \ STR
+Display only reads from this sample or read group
+.RE
+
  .TP
  .B mpileup
-.B samtools mpileup
-.RB [ \-EBug ]
+samtools mpileup
+.RB [ \-EBugp ]
  .RB [ \-C
  .IR capQcoef ]
  .RB [ \-r
@@ -293,6 +318,10 @@ Phred-scaled gap open sequencing error probability. Reducing
  .I INT
  leads to more indel calls. [40]
  .TP
+.BI -p
+Apply -m and -F thresholds per sample to increase sensitivity of calling.
+By default both options are applied to reads pooled from all samples.
+.TP
  .BI -P \ STR
  Comma dilimited list of platforms (determined by
  .BR @RG-PL )
@@ -324,7 +353,7 @@ which enables fast BAM concatenation.
  
  .TP
  .B sort
-samtools sort [-no] [-m maxMem] <in.bam> <out.prefix>
+samtools sort [-nof] [-m maxMem] <in.bam> <out.prefix>
  
  Sort alignments by leftmost coordinates. File
  .I <out.prefix>.bam
@@ -342,6 +371,13 @@ Output the final alignment to the standard output.
  .B -n
  Sort by read names rather than by chromosomal coordinates
  .TP
+.B -f
+Use
+.I <out.prefix>
+as the full output path and do not append
+.I .bam
+suffix.
+.TP
  .BI -m \ INT
  Approximately the maximum required memory. [500000000]
  .RE
@@ -566,6 +602,8 @@ Minimum base quality to be used in het calling. [13]
  .IR mutRate ]
  .RB [ \-p
  .IR varThres ]
+.RB [ \-m
+.IR varThres ]
  .RB [ \-P
  .IR prior ]
  .RB [ \-1
@@ -648,6 +686,12 @@ Call per-sample genotypes at variant sites (force -c)
  .BI -i \ FLOAT
  Ratio of INDEL-to-SNP mutation rate [0.15]
  .TP
+.BI -m \ FLOAT
+New model for improved multiallelic and rare-variant calling. Another
+ALT allele is accepted if P(chi^2) of LRT exceeds the FLOAT threshold. The 
+parameter seems robust and the actual value usually does not affect the results
+much; a good value to use is 0.99. This is the recommended calling method. [0]
+.TP
  .BI -p \ FLOAT
  A site is considered to be a variant if P(ref|D)<FLOAT [0.5]
  .TP
@@ -807,6 +851,9 @@ RP  int     # permutations yielding a smaller PCHI2
  CLR    int     Phred log ratio of genotype likelihoods with and without the trio/pair constraint
  UGT    string  Most probable genotype configuration without the trio constraint
  CGT    string  Most probable configuration with the trio constraint
+VDB    float   Tests variant positions within reads. Intended for filtering RNA-seq artifacts around splice sites
+RPB    float   Mann-Whitney rank-sum test for tail distance bias
+HWE    float   Hardy-Weinberg equilibrium test, Wigginton et al., PMID: 15789306
  .TE
  
  .SH EXAMPLES
@@ -934,6 +981,25 @@ tag set to
  Collecting indel candidates from reads sequenced by an indel-prone
  technology may affect the performance of indel calling.
  
+Note that there is a new calling model which can be invoked by
+
+    bcftools view -m0.99  ...
+
+which fixes some severe limitations of the default method.
+
+For filtering, best results seem to be achieved by first applying the
+.IR SnpGap
+filter and then applying some machine learning approach
+
+    vcf-annotate -f SnpGap=n
+    vcf filter ...
+
+Both can be found in the 
+.B vcftools
+and
+.B htslib
+package (links below).
+
  .IP o 2
  Derive the allele frequency spectrum (AFS) on a list of sites from multiple individuals:
  
@@ -992,3 +1058,9 @@ specification.
  .SH SEE ALSO
  .PP
  Samtools website: <http://samtools.sourceforge.net>
+.br
+Samtools latest source: <https://github.com/samtools/samtools>
+.br
+VCFtools website with stable link to VCF specification: <http://vcftools.sourceforge.net>
+.br
+HTSlib website: <https://github.com/samtools/htslib>