From: Heng Li Date: Sun, 3 Oct 2010 02:51:03 +0000 (+0000) Subject: * samtools-0.1.8-18 (r763) X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=dda27af89b70c52e075f6531d56c0bbccbc7246d * samtools-0.1.8-18 (r763) * added bcftools manual page * minor fix to mpileup and view command lines --- diff --git a/bam_plcmd.c b/bam_plcmd.c index af43918..c8ab7ba 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -461,7 +461,6 @@ int bam_pileup(int argc, char *argv[]) typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres; - double theta; char *reg, *fn_pos; faidx_t *fai; kh_64_t *hash; @@ -698,12 +697,10 @@ int bam_mpileup(int argc, char *argv[]) mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.max_mq = 60; - mplp.theta = 1e-3; mplp.min_baseQ = 13; mplp.capQ_thres = 0; - while ((c = getopt(argc, argv, "gf:r:l:M:q:t:Q:uaORC:")) >= 0) { + while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:")) >= 0) { switch (c) { - case 't': mplp.theta = atof(optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; @@ -711,7 +708,7 @@ int bam_mpileup(int argc, char *argv[]) case 'r': mplp.reg = strdup(optarg); break; case 'l': mplp.fn_pos = strdup(optarg); break; case 'g': mplp.flag |= MPLP_GLF; break; - case 'u': mplp.flag |= MPLP_NO_COMP; break; + case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break; case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break; case 'O': mplp.flag |= MPLP_NO_ORPHAN; break; case 'R': mplp.flag |= MPLP_REALN; break; @@ -730,7 +727,6 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq); fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ); fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq); - fprintf(stderr, " -t FLOAT scaled mutation rate [%lg]\n", mplp.theta); fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); fprintf(stderr, "\n"); diff --git a/bamtk.c b/bamtk.c index ddc7b61..a7644d0 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.8-17 (r760)" +#define PACKAGE_VERSION "0.1.8-18 (r763)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bcftools/bcftools.1 b/bcftools/bcftools.1 new file mode 100644 index 0000000..ebff301 --- /dev/null +++ b/bcftools/bcftools.1 @@ -0,0 +1,115 @@ +.TH bcftools 1 "2 October 2010" "bcftools" "Bioinformatics tools" +.SH NAME +.PP +bcftools - Utilities for the Binary Call Format (BCF) and VCF. +.SH SYNOPSIS +.PP +bcftools index in.bcf +.PP +bcftools view in.bcf chr2:100-200 > out.vcf +.PP +bcftools view -vc in.bcf > out.vcf 2> out.afs + +.SH DESCRIPTION +.PP +Bcftools is a toolkit for processing VCF/BCF files, calling variants and +estimating site allele frequencies and allele frequency spectrums. + +.SH COMMANDS AND OPTIONS + +.TP 10 +.B view +.B bcftools view +.RB [ \-cbuSAGgHvNQ ] +.RB [ \-1 +.IR nGroup1 ] +.RB [ \-l +.IR listFile ] +.RB [ \-t +.IR mutRate ] +.RB [ \-p +.IR varThres ] +.RB [ \-P +.IR prior ] +.I in.bcf +.RI [ region ] + +Convert between BCF and VCF, call variant candidates and estimate allele +frequencies. + +.B OPTIONS: +.RS +.TP 10 +.B -b +Output in the BCF format. The default is VCF. +.TP +.B -c +Call variants. +.TP +.B -v +Output variant sites only (force -c) +.TP +.B -g +Call per-sample genotypes at variant sites (force -c) +.TP +.B -u +Uncompressed BCF output (force -b). +.TP +.B -S +The input is VCF instead of BCF. +.TP +.B -A +Retain all possible alternate alleles at variant sites. By default, this +command discards unlikely alleles. +.TP +.B -G +Suppress all individual genotype information. +.TP +.B -H +Perform Hardy-Weiberg Equilibrium test. This will add computation time, sometimes considerably. +.TP +.B -N +Skip sites where the REF field is not A/C/G/T +.TP +.B -Q +Output the QCALL likelihood format +.TP +.BI "-1 " INT +Number of group-1 samples. This option is used for dividing input into +two groups for comparing. A zero value disables this functionality. [0] +.TP +.BI "-l " FILE +List of sites at which information are outputted [all sites] +.TP +.BI "-t " FLOAT +Scaled muttion rate for variant calling [0.001] +.TP +.BI "-p " FLOAT +A site is considered to be a variant if P(ref|D)= 0) { + while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:Qg")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; case 'l': vc.fn_list = strdup(optarg); break; case 'N': vc.flag |= VC_ACGT_ONLY; break; case 'G': vc.flag |= VC_NO_GENO; break; - case 'L': vc.flag |= VC_NO_PL; break; case 'A': vc.flag |= VC_KEEPALT; break; case 'b': vc.flag |= VC_BCFOUT; break; case 'S': vc.flag |= VC_VCFIN; break; case 'c': vc.flag |= VC_CALL; break; - case 'v': vc.flag |= VC_VARONLY; break; + case 'v': vc.flag |= VC_VARONLY | VC_CALL; break; case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break; case 'H': vc.flag |= VC_HWE; break; - case 'g': vc.flag |= VC_CALL_GT; break; + case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break; case 't': vc.theta = atof(optarg); break; case 'p': vc.pref = atof(optarg); break; case 'Q': vc.flag |= VC_QCALL; break; @@ -241,15 +239,14 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, "\n"); fprintf(stderr, "Usage: bcftools view [options] [reg]\n\n"); fprintf(stderr, "Options: -c SNP calling\n"); + fprintf(stderr, " -v output potential variant sites only (force -c)\n"); + fprintf(stderr, " -g call genotypes at variant sites (force -c)\n"); fprintf(stderr, " -b output BCF instead of VCF\n"); - fprintf(stderr, " -u uncompressed BCF output\n"); + fprintf(stderr, " -u uncompressed BCF output (force -b)\n"); fprintf(stderr, " -S input is VCF\n"); fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n"); fprintf(stderr, " -G suppress all individual genotype information\n"); - fprintf(stderr, " -g call genotypes for variant sites\n"); - fprintf(stderr, " -L discard the PL genotype field\n"); fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n"); - fprintf(stderr, " -v output potential variant sites only\n"); fprintf(stderr, " -N skip sites where REF is not A/C/G/T\n"); fprintf(stderr, " -Q output the QCALL likelihood format\n"); fprintf(stderr, " -1 INT number of group-1 samples [0]\n"); diff --git a/samtools.1 b/samtools.1 index d79d176..2bfeb43 100644 --- a/samtools.1 +++ b/samtools.1 @@ -1,4 +1,4 @@ -.TH samtools 1 "11 July 2010" "samtools-0.1.8" "Bioinformatics tools" +.TH samtools 1 "2 October 2010" "samtools-0.1.8" "Bioinformatics tools" .SH NAME .PP samtools - Utilities for the Sequence Alignment/Map (SAM) format @@ -20,7 +20,7 @@ samtools faidx ref.fasta .PP samtools pileup -f ref.fasta aln.sorted.bam .PP -samtools mpileup -f ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam +samtools mpileup -C50 -agf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam .PP samtools tview aln.sorted.bam ref.fasta @@ -249,21 +249,50 @@ Phred probability of an indel in sequencing/prep. [40] .TP .B mpileup -samtools mpileup [-r reg] [-f in.fa] in.bam [in2.bam [...]] +samtools mpileup [-aug] [-C coef] [-r reg] [-f in.fa] [-l list] [-M capMapQ] [-Q minBaseQ] [-q minMapQ] in.bam [in2.bam [...]] -Generate pileup for multiple BAM files. Consensus calling is not -implemented. +Generate BCF or pileup for one or multiple BAM files. Alignment records +are grouped by sample identifiers in @RG header lines. If sample +identifiers are absent, each input file is regarded as one sample. .B OPTIONS: .RS .TP 8 +.B -a +Perform HMM realignment to compute base alignment quality (BAQ). Base +quality will be capped by BAQ. +.TP +.B -g +Compute genotype likelihoods and output them in the binary call format (BCF). +.TP +.B -u +Similar to +.B -g +except that the output is uncompressed BCF, which is preferred for pipeing. +.TP +.B -C INT +Coefficient for downgrading mapping quality for reads containing +excessive mismatches. Given a read with a phred-scaled probability q of +being generated from the mapped position, the new mapping quality is +about sqrt((INT-q)/INT)*INT. A zero value disables this +functionality; if enabled, the recommended value is 50. [0] +.TP +.B -f FILE +The reference file [null] +.TP +.B -l FILE +File containing a list of sites where pileup or BCF is outputted [null] +.TP +.B -q INT +Minimum mapping quality for an alignment to be used [0] +.TP +.B -Q INT +Minimum base quality for a base to be considered [13] +.TP .B -r STR Only generate pileup in region .I STR [all sites] -.TP -.B -f FILE -The reference file [null] .RE .TP