* allow to change the compression level in BAM output

author Heng Li <lh3@live.co.uk>

Fri, 18 Mar 2011 20:33:51 +0000 (20:33 +0000)

committer Heng Li <lh3@live.co.uk>

Fri, 18 Mar 2011 20:33:51 +0000 (20:33 +0000)
author Heng Li <lh3@live.co.uk>
Fri, 18 Mar 2011 20:33:51 +0000 (20:33 +0000)
committer Heng Li <lh3@live.co.uk>
Fri, 18 Mar 2011 20:33:51 +0000 (20:33 +0000)
diff --git a/Makefile b/Makefile

index af93d9ca5fb9010943877317bdb64b88d65e4ea1..f0259422e289216096a71bf5cf5bad5fc1aab9e6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ DFLAGS=         -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_
  KNETFILE_O=    knetfile.o
  LOBJS=         bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
                         bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o \
-                       $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o
+                       $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bam_cat.o
  AOBJS=         bam_tview.o bam_maqcns.o bam_plcmd.o sam_view.o \
                         bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o     \
                         bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \
diff --git a/NEWS b/NEWS

index 8455b484a0ce46bac6ca004e8ef2e2fb6e8239cf..946ba7b4e98d584081b846105bde5f438f6e3ba1 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,28 @@
+Beta release 0.1.14 (16 March, 2011)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This release implements a method for testing associations for case-control
+data. The method does not call genotypes but instead sums over all genotype
+configurations to compute a chi^2 based test statistics. It can be potentially
+applied to comparing a pair of samples (e.g. a tumor-normal pair), but this
+has not been evaluated on real data.
+
+Another new feature is to make X chromosome variant calls when female and male
+samples are both present. The user needs to provide a file indicating the
+ploidy of each sample.
+
+Other new features:
+
+ * Added `samtools mpileup -L' to skip INDEL calling in regions with
+   excessively high coverage. Such regions dramatically slow down mpileup.
+
+ * Added `bcftools view -F' to parse BCF files generated by samtools r921 or
+   older which encode PL in a different way.
+
+(0.1.14: 16 March 2011, r930:163)
+
+
+
  Beta release 0.1.13 (1 March, 2011)
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
@@ -56,7 +81,7 @@ Other notable changes in bcftools:
   * Updated the BCF spec.
  
   * Added the `FQ' VCF INFO field, which gives the phred-scaled probability
-   of all samples being the samei (identical to the reference or all homozygous
+   of all samples being the same (identical to the reference or all homozygous
     variants). Option `view -f' has been dropped.
  
   * Implementated of "vcfutils.pl vcf2fq" to generate a consensus sequence
diff --git a/bam.h b/bam.h

index 4ad264447a94e11b63aab2ba9fba7501bdb2203c..84253ae851decf05d1675b33054d8c41f3b3e6b6 100644 (file)
--- a/bam.h
+++ b/bam.h
@@ -40,7 +40,7 @@
    @copyright Genome Research Ltd.
   */
  
-#define BAM_VERSION "0.1.13 (r926:134)"
+#define BAM_VERSION "0.1.13 (r926:161)"
  
  #include <stdint.h>
  #include <stdlib.h>
diff --git a/bam_plcmd.c b/bam_plcmd.c

index bba62cbcdcabe52cbdba71ab71a6b2e5681aa032..368502cd44fe73dc5ceb8ad6210fc2432fe49678 100644 (file)
--- a/bam_plcmd.c
+++ b/bam_plcmd.c
@@ -536,7 +536,7 @@ int bam_pileup(int argc, char *argv[])
  #define MPLP_EXT_BAQ 0x800
  
  typedef struct {
-       int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth;
+       int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth;
         int openQ, extQ, tandemQ, min_support; // for indels
         double min_frac; // for indels
         char *reg, *fn_pos, *pl_list;
@@ -617,7 +617,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
         extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
         extern void bcf_call_del_rghash(void *rghash);
         mplp_aux_t **data;
-       int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid, max_depth;
+       int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid, max_depth, max_indel_depth;
         const bam_pileup1_t **plp;
         bam_mplp_t iter;
         bam_header_t *h = 0;
@@ -722,6 +722,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                 max_depth = 8000 / sm->n;
                 fprintf(stderr, "<%s> Set max per-sample depth to %d\n", __func__, max_depth);
         }
+       max_indel_depth = conf->max_indel_depth * sm->n;
         bam_mplp_set_maxcnt(iter, max_depth);
         while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
                 if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
@@ -737,8 +738,9 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                         ref_tid = tid;
                 }
                 if (conf->flag & MPLP_GLF) {
-                       int _ref0, ref16;
+                       int total_depth, _ref0, ref16;
                         bcf1_t *b = calloc(1, sizeof(bcf1_t));
+                       for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
                         group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp);
                         _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
                         ref16 = bam_nt16_table[_ref0];
@@ -750,7 +752,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
                         bcf_write(bp, bh, b);
                         bcf_destroy(b);
                         // call indels
-                       if (!(conf->flag&MPLP_NO_INDEL) && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) {
+                       if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) {
                                 for (i = 0; i < gplp.n; ++i)
                                         bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
                                 if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) {
@@ -866,11 +868,11 @@ int bam_mpileup(int argc, char *argv[])
         mplp.max_mq = 60;
         mplp.min_baseQ = 13;
         mplp.capQ_thres = 0;
-       mplp.max_depth = 250;
+       mplp.max_depth = 250; mplp.max_indel_depth = 250;
         mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
         mplp.min_frac = 0.002; mplp.min_support = 1;
         mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
-       while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:b:P:o:e:h:Im:F:EG:")) >= 0) {
+       while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:")) >= 0) {
                 switch (c) {
                 case 'f':
                         mplp.fai = fai_load(optarg);
@@ -900,6 +902,7 @@ int bam_mpileup(int argc, char *argv[])
                 case 'A': use_orphan = 1; break;
                 case 'F': mplp.min_frac = atof(optarg); break;
                 case 'm': mplp.min_support = atoi(optarg); break;
+               case 'L': mplp.max_indel_depth = atoi(optarg); break;
                 case 'G': {
                                 FILE *fp_rg;
                                 char buf[1024];
@@ -924,7 +927,8 @@ int bam_mpileup(int argc, char *argv[])
                 fprintf(stderr, "         -M INT      cap mapping quality at INT [%d]\n", mplp.max_mq);
                 fprintf(stderr, "         -Q INT      min base quality [%d]\n", mplp.min_baseQ);
                 fprintf(stderr, "         -q INT      filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
-               fprintf(stderr, "         -d INT      max per-sample depth [%d]\n", mplp.max_depth);
+               fprintf(stderr, "         -d INT      max per-BAM depth [%d]\n", mplp.max_depth);
+               fprintf(stderr, "         -L INT      max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth);
                 fprintf(stderr, "         -P STR      comma separated list of platforms for indels [all]\n");
                 fprintf(stderr, "         -o INT      Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ);
                 fprintf(stderr, "         -e INT      Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ);
diff --git a/bam_sort.c b/bam_sort.c

index 38f15d655c253dc1f7e8cd50dd39c178c448f584..7aeccff3e1e8e61507c4983ce535de884d6bbcae 100644 (file)
--- a/bam_sort.c
+++ b/bam_sort.c
@@ -298,14 +298,19 @@ KSORT_INIT(sort, bam1_p, bam1_lt)
  
  static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h, int is_stdout)
  {
-       char *name;
+       char *name, mode[3];
         int i;
         bamFile fp;
         ks_mergesort(sort, k, buf, 0);
         name = (char*)calloc(strlen(prefix) + 20, 1);
-       if (n >= 0) sprintf(name, "%s.%.4d.bam", prefix, n);
-       else sprintf(name, "%s.bam", prefix);
-       fp = is_stdout? bam_dopen(fileno(stdout), "w") : bam_open(name, "w");
+       if (n >= 0) {
+               sprintf(name, "%s.%.4d.bam", prefix, n);
+               strcpy(mode, "w1");
+       } else {
+               sprintf(name, "%s.bam", prefix);
+               strcpy(mode, "w");
+       }
+       fp = is_stdout? bam_dopen(fileno(stdout), mode) : bam_open(name, mode);
         if (fp == 0) {
                 fprintf(stderr, "[sort_blocks] fail to create file %s.\n", name);
                 free(name);
diff --git a/bamtk.c b/bamtk.c

index 1d1151973fb08efbbb6d642480712fbc6b3b1563..5309d5ece3040e1413d9c2617d6efd1f5e0dfd67 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -25,6 +25,7 @@ int main_import(int argc, char *argv[]);
  int main_reheader(int argc, char *argv[]);
  int main_cut_target(int argc, char *argv[]);
  int main_phase(int argc, char *argv[]);
+int main_cat(int argc, char *argv[]);
  
  int faidx_main(int argc, char *argv[]);
  int glf3_view_main(int argc, char *argv[]);
@@ -92,6 +93,7 @@ static int usage()
         fprintf(stderr, "         merge       merge sorted alignments\n");
         fprintf(stderr, "         rmdup       remove PCR duplicates\n");
         fprintf(stderr, "         reheader    replace BAM header\n");
+       fprintf(stderr, "         cat         concatenate BAMs\n");
         fprintf(stderr, "         targetcut   cut fosmid regions (for fosmid pool only)\n");
         fprintf(stderr, "         phase       phase heterozygotes\n");
         fprintf(stderr, "\n");
@@ -131,6 +133,7 @@ int main(int argc, char *argv[])
         else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);
         else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);
         else if (strcmp(argv[1], "reheader") == 0) return main_reheader(argc-1, argv+1);
+       else if (strcmp(argv[1], "cat") == 0) return main_cat(argc-1, argv+1);
         else if (strcmp(argv[1], "targetcut") == 0) return main_cut_target(argc-1, argv+1);
         else if (strcmp(argv[1], "phase") == 0) return main_phase(argc-1, argv+1);
  #if _CURSES_LIB != 0
diff --git a/bcftools/bcf.h b/bcftools/bcf.h

index f545a916879ee9f527c3e12b622926dfaf96971b..ba6dbe9b42ae033c6b3c179a730a48312cf927dd 100644 (file)
--- a/bcftools/bcf.h
+++ b/bcftools/bcf.h
@@ -146,6 +146,10 @@ extern "C" {
         int bcf_is_indel(const bcf1_t *b);
         bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list);
         int bcf_subsam(int n_smpl, int *list, bcf1_t *b);
+       // move GT to the first FORMAT field
+       int bcf_fix_gt(bcf1_t *b);
+       // update PL generated by old samtools
+       int bcf_fix_pl(bcf1_t *b);
  
         // string hash table
         void *bcf_build_refhash(bcf_hdr_t *h);
diff --git a/bcftools/bcf.tex b/bcftools/bcf.tex

index 6f2171fa56ee39c6d1baa23da8e18794764ddcb5..442fc2a0186ab03d66a055c404fea6cdcef632c1 100644 (file)
--- a/bcftools/bcf.tex
+++ b/bcftools/bcf.tex
@@ -33,13 +33,14 @@
  \end{center}
  
  \begin{center}
-\begin{tabular}{cll}
+\begin{tabular}{clp{9cm}}
  \hline
  \multicolumn{1}{l}{\bf Field} & \multicolumn{1}{l}{\bf Type} & \multicolumn{1}{l}{\bf Description} \\\hline
  {\tt DP} & {\tt uint16\_t[n]} & Read depth \\
  {\tt GL} & {\tt float[n*G]} & Log10 likelihood of data; $G=\frac{A(A+1)}{2}$, $A=\#\{alleles\}$\\
  {\tt GT} & {\tt uint8\_t[n]} & {\tt missing\char60\char60 7 | phased\char60\char60 6 | allele1\char60\char60 3 | allele2} \\
-{\tt \_GT} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic GT; the first int equals the max ploidy $P$} \\
+{\tt \_GT} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic GT; the first int equals the max ploidy $P$. If the highest bit is set,
+       the allele is not present (e.g. due to different ploidy between samples).} \\
  {\tt GQ} & {\tt uint8\_t[n]} & {Genotype quality}\\
  {\tt HQ} & {\tt uint8\_t[n*2]} & {Haplotype quality}\\
  {\tt \_HQ} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic HQ}\\
@@ -67,10 +68,10 @@
         BCF proposal).
  \item Predefined {\tt FORMAT} fields can be missing from VCF headers, but custom {\tt FORMAT} fields
         are required to be explicitly defined in the headers.
-\item A {\tt FORMAT} field with its name starting with `{\tt \_}' gives an alternative
-       binary representation of the corresponding VCF field. The alternative representation
-       is used when the default representation is unable to keep the genotype information,
-       for example, when the ploidy is over 2 or there are more than 8 alleles.
+\item A {\tt FORMAT} field with its name starting with `{\tt \_}' is specific to BCF only.
+       It gives an alternative binary representation of the corresponding VCF field, in case
+       the default representation is unable to keep the genotype information,
+       for example, when the ploidy is not 2 or there are more than 8 alleles.
  \end{itemize}
  
  \end{document}
diff --git a/bcftools/bcftools.1 b/bcftools/bcftools.1

index 6c7403bea6b8b6ebde3c77a8a1ef55ee890fcd59..236abe401be1cf77946b1e4e7ad2e4be09b752a0 100644 (file)
--- a/bcftools/bcftools.1
+++ b/bcftools/bcftools.1
@@ -1,4 +1,4 @@
-.TH bcftools 1 "2 October 2010" "bcftools" "Bioinformatics tools"
+.TH bcftools 1 "16 March 2011" "bcftools" "Bioinformatics tools"
  .SH NAME
  .PP
  bcftools - Utilities for the Binary Call Format (BCF) and VCF.
@@ -20,53 +20,55 @@ estimating site allele frequencies and allele frequency spectrums.
  .TP 10
  .B view
  .B bcftools view
-.RB [ \-cbuSAGgHvNQ ]
-.RB [ \-1
-.IR nGroup1 ]
+.RB [ \-AbFGNQSucgv ]
+.RB [ \-D
+.IR seqDict ]
  .RB [ \-l
-.IR listFile ]
+.IR listLoci ]
+.RB [ \-s
+.IR listSample ]
+.RB [ \-i
+.IR gapSNPratio ]
  .RB [ \-t
  .IR mutRate ]
  .RB [ \-p
  .IR varThres ]
  .RB [ \-P
  .IR prior ]
+.RB [ \-1
+.IR nGroup1 ]
+.RB [ \-U
+.IR nPerm ]
+.RB [ \-X
+.IR permThres ]
  .I in.bcf
  .RI [ region ]
  
  Convert between BCF and VCF, call variant candidates and estimate allele
  frequencies.
  
-.B OPTIONS:
  .RS
+.TP
+.B Input/Output Options:
+.TP 10
+.B -A
+Retain all possible alternate alleles at variant sites. By default, the view
+command discards unlikely alleles.
  .TP 10
  .B -b
  Output in the BCF format. The default is VCF.
  .TP
-.B -c
-Call variants.
+.BI -D \ FILE
+Sequence dictionary (list of chromosome names) for VCF->BCF conversion [null]
  .TP
-.B -v
-Output variant sites only (force -c)
-.TP
-.B -g
-Call per-sample genotypes at variant sites (force -c)
-.TP
-.B -u
-Uncompressed BCF output (force -b).
-.TP
-.B -S
-The input is VCF instead of BCF.
-.TP
-.B -A
-Retain all possible alternate alleles at variant sites. By default, this
-command discards unlikely alleles.
+.B -F
+Indicate PL is generated by r921 or before (ordering is different).
  .TP
  .B -G
  Suppress all individual genotype information.
  .TP
-.B -H
-Perform Hardy-Weiberg Equilibrium test. This will add computation time, sometimes considerably.
+.BI -l \ FILE
+List of sites at which information are outputted [all sites]
  .TP
  .B -N
  Skip sites where the REF field is not A/C/G/T
@@ -74,31 +76,63 @@ Skip sites where the REF field is not A/C/G/T
  .B -Q
  Output the QCALL likelihood format
  .TP
-.B -f
-Reference-free variant calling mode. In this mode, the prior will be
-folded; a variant is called iff the sample(s) contains at least two
-alleles; the QUAL field in the VCF/BCF output is changed accordingly.
+.BI -s \ FILE
+List of samples to use. In the output, the ordering of samples will be identical
+to the one in
+.IR FILE .
+[null]
  .TP
-.BI "-1 " INT
-Number of group-1 samples. This option is used for dividing input into
-two groups for comparing. A zero value disables this functionality. [0]
+.B -S
+The input is VCF instead of BCF.
  .TP
-.BI "-l " FILE
-List of sites at which information are outputted [all sites]
+.B -u
+Uncompressed BCF output (force -b).
  .TP
-.BI "-t " FLOAT
-Scaled muttion rate for variant calling [0.001]
+.B Consensus/Variant Calling Options:
+.TP 10
+.B -c
+Call variants.
+.TP
+.B -g
+Call per-sample genotypes at variant sites (force -c)
  .TP
-.BI "-p " FLOAT
+.BI -i \ FLOAT
+Ratio of INDEL-to-SNP mutation rate [0.15]
+.TP
+.BI -p \ FLOAT
  A site is considered to be a variant if P(ref|D)<FLOAT [0.5]
  .TP
-.BI "-P " STR
+.BI -P \ STR
  Prior or initial allele frequency spectrum. If STR can be
  .IR full ,
  .IR cond2 ,
  .I flat
  or the file consisting of error output from a previous variant calling
  run.
+.TP
+.BI -t \ FLOAT
+Scaled muttion rate for variant calling [0.001]
+.TP
+.B -v
+Output variant sites only (force -c)
+.TP
+.B Contrast Calling and Association Test Options:
+.TP
+.BI -1 \ INT
+Number of group-1 samples. This option is used for dividing the samples into
+two groups for contrast SNP calling or association test.
+When this option is in use, the following VCF INFO will be outputted:
+PC2, PCHI2 and QCHI2. [0]
+.TP
+.BI -U \ INT
+Number of permutations for association test (effective only with
+.BR -1 )
+[0]
+.TP
+.BI -X \ FLOAT
+Only perform permutations for P(chi^2)<FLOAT (effective only with
+.BR -U )
+[0.01]
  .RE
  
  .TP
@@ -118,3 +152,24 @@ Index sorted BCF for random access.
  Concatenate BCF files. The input files are required to be sorted and
  have identical samples appearing in the same order.
  .RE
+
+.SH BCFTOOLS SPECIFIC VCF TAGS
+
+.TS
+center box;
+cb | cb | cb
+l | l | l .
+Tag    Format  Description
+_
+AF1    double  Max-likelihood estimate of the site allele frequency (AF) of the first ALT allele
+CI95   double[2]       Equal-tail Bayesian credible interval of AF at the 95% level
+DP     int     Raw read depth (without quality filtering)
+DP4    int[4]  # high-quality reference forward bases, ref reverse, alternate for and alt rev bases
+FQ     int     Consensus quality. Positive: sample genotypes different; negative: otherwise
+MQ     int     Root-Mean-Square mapping quality of covering reads
+PC2    int[2]  Phred probability of AF in group1 samples being larger (,smaller) than in group2
+PCHI2  double  Posterior weighted chi^2 P-value between group1 and group2 samples
+PV4    double[4]       P-value for strand bias, baseQ bias, mapQ bias and tail distance bias
+QCHI2  int     Phred-scaled PCHI2
+RP     int     # permutations yielding a smaller PCHI2
+.TE
diff --git a/bcftools/bcfutils.c b/bcftools/bcfutils.c

index fc1c542c2d6a5142274ef157a10af31a99c88269..91cc2c23ccb33c66c944fe013df1e92a84867858 100644 (file)
--- a/bcftools/bcfutils.c
+++ b/bcftools/bcfutils.c
@@ -141,6 +141,33 @@ int bcf_fix_gt(bcf1_t *b)
         return 0;
  }
  
+int bcf_fix_pl(bcf1_t *b)
+{
+       int i;
+       uint32_t tmp;
+       uint8_t *PL, *swap;
+       bcf_ginfo_t *gi;
+       // pinpoint PL
+       tmp = bcf_str2int("PL", 2);
+       for (i = 0; i < b->n_gi; ++i)
+               if (b->gi[i].fmt == tmp) break;
+       if (i == b->n_gi) return 0;
+       // prepare
+       gi = b->gi + i;
+       PL = (uint8_t*)gi->data;
+       swap = alloca(gi->len);
+       // loop through individuals
+       for (i = 0; i < b->n_smpl; ++i) {
+               int k, l, x;
+               uint8_t *PLi = PL + i * gi->len;
+               memcpy(swap, PLi, gi->len);
+               for (k = x = 0; k < b->n_alleles; ++k)
+                       for (l = k; l < b->n_alleles; ++l)
+                               PLi[l*(l+1)/2 + k] = swap[x++];
+       }
+       return 0;
+}
+
  static void *locate_field(const bcf1_t *b, const char *fmt, int l)
  {
         int i;
@@ -188,6 +215,31 @@ int bcf_anno_max(bcf1_t *b)
         return 0;
  }
  
+// FIXME: only data are shuffled; the header is NOT
+int bcf_shuffle(bcf1_t *b, int seed)
+{
+       int i, j, *a;
+       if (seed > 0) srand48(seed);
+       a = malloc(b->n_smpl * sizeof(int));
+       for (i = 0; i < b->n_smpl; ++i) a[i] = i;
+       for (i = b->n_smpl; i > 1; --i) {
+               int tmp;
+               j = (int)(drand48() * i);
+               tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;
+       }
+       for (j = 0; j < b->n_gi; ++j) {
+               bcf_ginfo_t *gi = b->gi + j;
+               uint8_t *swap, *data = (uint8_t*)gi->data;
+               swap = malloc(gi->len * b->n_smpl);
+               for (i = 0; i < b->n_smpl; ++i)
+                       memcpy(swap + gi->len * a[i], data + gi->len * i, gi->len);
+               free(gi->data);
+               gi->data = swap;
+       }
+       free(a);
+       return 0;
+}
+
  bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list)
  {
         int i, ret, j;
@@ -197,12 +249,15 @@ bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int
         kstring_t s;
         s.l = s.m = 0; s.s = 0;
         hash = kh_init(str2id);
-       for (i = 0; i < n; ++i)
-               k = kh_put(str2id, hash, samples[i], &ret);
-       for (i = j = 0; i < h0->n_smpl; ++i) {
-               if (kh_get(str2id, hash, h0->sns[i]) != kh_end(hash)) {
-                       list[j++] = i;
-                       kputs(h0->sns[i], &s); kputc('\0', &s);
+       for (i = 0; i < h0->n_smpl; ++i) {
+               k = kh_put(str2id, hash, h0->sns[i], &ret);
+               kh_val(hash, k) = i;
+       }
+       for (i = j = 0; i < n; ++i) {
+               k = kh_get(str2id, hash, samples[i]);
+               if (k != kh_end(hash)) {
+                       list[j++] = kh_val(hash, k);
+                       kputs(samples[i], &s); kputc('\0', &s);
                 }
         }
         if (j < n) fprintf(stderr, "<%s> %d samples in the list but not in BCF.", __func__, n - j);
@@ -217,13 +272,17 @@ bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int
         return h;
  }
  
-int bcf_subsam(int n_smpl, int *list, bcf1_t *b) // list MUST BE sorted
+int bcf_subsam(int n_smpl, int *list, bcf1_t *b)
  {
         int i, j;
         for (j = 0; j < b->n_gi; ++j) {
                 bcf_ginfo_t *gi = b->gi + j;
+               uint8_t *swap;
+               swap = malloc(gi->len * b->n_smpl);
                 for (i = 0; i < n_smpl; ++i)
-                       if (i != list[i]) memcpy((uint8_t*)gi->data + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len);
+                       memcpy(swap + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len);
+               free(gi->data);
+               gi->data = swap;
         }
         b->n_smpl = n_smpl;
         return 0;
diff --git a/bcftools/call1.c b/bcftools/call1.c

index 9c0b498c18cb46e2dd673f0527f7c02d2e76dc3d..a520e3cc966553c2acfbef98a0ddfc2e4c48b53f 100644 (file)
--- a/bcftools/call1.c
+++ b/bcftools/call1.c
@@ -6,6 +6,7 @@
  #include "bcf.h"
  #include "prob1.h"
  #include "kstring.h"
+#include "time.h"
  
  #include "khash.h"
  KHASH_SET_INIT_INT64(set64)
@@ -26,12 +27,13 @@ KSTREAM_INIT(gzFile, gzread, 16384)
  #define VC_ADJLD   4096
  #define VC_NO_INDEL 8192
  #define VC_ANNO_MAX 16384
+#define VC_FIX_PL   32768
  
  typedef struct {
-       int flag, prior_type, n1, n_sub, *sublist;
+       int flag, prior_type, n1, n_sub, *sublist, n_perm;
         char *fn_list, *prior_file, **subsam, *fn_dict;
         uint8_t *ploidy;
-       double theta, pref, indel_frac;
+       double theta, pref, indel_frac, min_perm_p;
  } viewconf_t;
  
  khash_t(set64) *bcf_load_pos(const char *fn, bcf_hdr_t *_h)
@@ -133,11 +135,11 @@ static void rm_info(bcf1_t *b, const char *key)
  static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag)
  {
         kstring_t s;
-       int is_var = (pr->p_ref < pref);
-       double r = is_var? pr->p_ref : pr->p_var, fq;
+       int has_I16, is_var = (pr->p_ref < pref);
+       double fq, r = is_var? pr->p_ref : pr->p_var;
         anno16_t a;
  
-       test16(b, &a);
+       has_I16 = test16(b, &a) >= 0? 1 : 0;
         rm_info(b, "I16=");
  
         memset(&s, 0, sizeof(kstring_t));
@@ -147,15 +149,24 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p
         if (b->info[0]) kputc(';', &s);
  //     ksprintf(&s, "AF1=%.4lg;AFE=%.4lg;CI95=%.4lg,%.4lg", 1.-pr->f_em, 1.-pr->f_exp, pr->cil, pr->cih);
         ksprintf(&s, "AF1=%.4g;CI95=%.4g,%.4g", 1.-pr->f_em, pr->cil, pr->cih);
-       ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
+       if (has_I16) ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
         fq = pr->p_ref_folded < 0.5? -4.343 * log(pr->p_ref_folded) : 4.343 * log(pr->p_var_folded);
         if (fq < -999) fq = -999;
         if (fq > 999) fq = 999;
         ksprintf(&s, ";FQ=%.3g", fq);
-       if (a.is_tested) {
-               if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%g,%g,%g,%g", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
-               ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]);
+       if (pr->cmp[0] >= 0.) {
+               int i, q[3], pq;
+               for (i = 1; i < 3; ++i) {
+                       double x = pr->cmp[i] + pr->cmp[0]/2.;
+                       q[i] = x == 0? 255 : (int)(-4.343 * log(x) + .499);
+                       if (q[i] > 255) q[i] = 255;
+               }
+               pq = (int)(-4.343 * log(pr->p_chi2) + .499);
+               if (pr->perm_rank >= 0) ksprintf(&s, ";PR=%d", pr->perm_rank);
+               ksprintf(&s, ";QCHI2=%d;PCHI2=%.3g;PC2=%d,%d", pq, q[1], q[2], pr->p_chi2);
+//             ksprintf(&s, ",%g,%g,%g", pr->cmp[0], pr->cmp[1], pr->cmp[2]);
         }
+       if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]);
         kputc('\0', &s);
         kputs(b->fmt, &s); kputc('\0', &s);
         free(b->str);
@@ -232,7 +243,7 @@ static void write_header(bcf_hdr_t *h)
         if (!strstr(str.s, "##INFO=<ID=MQ,"))
                 kputs("##INFO=<ID=MQ,Number=1,Type=Integer,Description=\"Root-mean-square mapping quality of covering reads\">\n", &str);
         if (!strstr(str.s, "##INFO=<ID=FQ,"))
-               kputs("##INFO=<ID=FQ,Number=1,Type=Float,Description=\"Phred probability that sample chromosomes are not all the same\">\n", &str);
+               kputs("##INFO=<ID=FQ,Number=1,Type=Float,Description=\"Phred probability of all samples being the same\">\n", &str);
         if (!strstr(str.s, "##INFO=<ID=AF1,"))
                 kputs("##INFO=<ID=AF1,Number=1,Type=Float,Description=\"Max-likelihood estimate of the site allele frequency of the first ALT allele\">\n", &str);
         if (!strstr(str.s, "##INFO=<ID=CI95,"))
@@ -241,7 +252,15 @@ static void write_header(bcf_hdr_t *h)
                 kputs("##INFO=<ID=PV4,Number=4,Type=Float,Description=\"P-values for strand bias, baseQ bias, mapQ bias and tail distance bias\">\n", &str);
      if (!strstr(str.s, "##INFO=<ID=INDEL,"))
          kputs("##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">\n", &str);
-    if (!strstr(str.s, "##INFO=<ID=GT,"))
+    if (!strstr(str.s, "##INFO=<ID=PC2,"))
+        kputs("##INFO=<ID=PC2,Number=2,Type=Integer,Description=\"Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.\">\n", &str);
+    if (!strstr(str.s, "##INFO=<ID=PCHI2,"))
+        kputs("##INFO=<ID=PCHI2,Number=1,Type=Float,Description=\"Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.\">\n", &str);
+    if (!strstr(str.s, "##INFO=<ID=QCHI2,"))
+        kputs("##INFO=<ID=QCHI2,Number=1,Type=Integer,Description=\"Phred scaled PCHI2.\">\n", &str);
+    if (!strstr(str.s, "##INFO=<ID=RP,"))
+        kputs("##INFO=<ID=RP,Number=1,Type=Integer,Description=\"# permutations yielding a smaller PCHI2.\">\n", &str);
+    if (!strstr(str.s, "##FORMAT=<ID=GT,"))
          kputs("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n", &str);
      if (!strstr(str.s, "##FORMAT=<ID=GQ,"))
          kputs("##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">\n", &str);
@@ -264,9 +283,10 @@ int bcfview(int argc, char *argv[])
         extern void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x);
         extern int bcf_fix_gt(bcf1_t *b);
         extern int bcf_anno_max(bcf1_t *b);
+       extern int bcf_shuffle(bcf1_t *b, int seed);
         bcf_t *bp, *bout = 0;
         bcf1_t *b, *blast;
-       int c;
+       int c, *seeds = 0;
         uint64_t n_processed = 0;
         viewconf_t vc;
         bcf_p1aux_t *p1 = 0;
@@ -277,12 +297,13 @@ int bcfview(int argc, char *argv[])
  
         tid = begin = end = -1;
         memset(&vc, 0, sizeof(viewconf_t));
-       vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.;
-       while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:IMs:D:")) >= 0) {
+       vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01;
+       while ((c = getopt(argc, argv, "FN1:l:cHAGvbSuP:t:p:QgLi:IMs:D:U:X:")) >= 0) {
                 switch (c) {
                 case '1': vc.n1 = atoi(optarg); break;
                 case 'l': vc.fn_list = strdup(optarg); break;
                 case 'D': vc.fn_dict = strdup(optarg); break;
+               case 'F': vc.flag |= VC_FIX_PL; break;
                 case 'N': vc.flag |= VC_ACGT_ONLY; break;
                 case 'G': vc.flag |= VC_NO_GENO; break;
                 case 'A': vc.flag |= VC_KEEPALT; break;
@@ -299,6 +320,8 @@ int bcfview(int argc, char *argv[])
                 case 'i': vc.indel_frac = atof(optarg); break;
                 case 'Q': vc.flag |= VC_QCALL; break;
                 case 'L': vc.flag |= VC_ADJLD; break;
+               case 'U': vc.n_perm = atoi(optarg); break;
+               case 'X': vc.min_perm_p = atof(optarg); break;
                 case 's': vc.subsam = read_samples(optarg, &vc.n_sub);
                         vc.ploidy = calloc(vc.n_sub + 1, 1);
                         for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1];
@@ -323,19 +346,21 @@ int bcfview(int argc, char *argv[])
                 fprintf(stderr, "         -S        input is VCF\n");
                 fprintf(stderr, "         -A        keep all possible alternate alleles at variant sites\n");
                 fprintf(stderr, "         -G        suppress all individual genotype information\n");
-               fprintf(stderr, "         -H        perform Hardy-Weinberg test (slower)\n");
                 fprintf(stderr, "         -N        skip sites where REF is not A/C/G/T\n");
                 fprintf(stderr, "         -Q        output the QCALL likelihood format\n");
                 fprintf(stderr, "         -L        calculate LD for adjacent sites\n");
                 fprintf(stderr, "         -I        skip indels\n");
+               fprintf(stderr, "         -F        PL generated by r921 or before\n");
                 fprintf(stderr, "         -D FILE   sequence dictionary for VCF->BCF conversion [null]\n");
-               fprintf(stderr, "         -1 INT    number of group-1 samples [0]\n");
                 fprintf(stderr, "         -l FILE   list of sites to output [all sites]\n");
                 fprintf(stderr, "         -t FLOAT  scaled substitution mutation rate [%.4g]\n", vc.theta);
                 fprintf(stderr, "         -i FLOAT  indel-to-substitution ratio [%.4g]\n", vc.indel_frac);
                 fprintf(stderr, "         -p FLOAT  variant if P(ref|D)<FLOAT [%.3g]\n", vc.pref);
                 fprintf(stderr, "         -P STR    type of prior: full, cond2, flat [full]\n");
                 fprintf(stderr, "         -s FILE   list of samples to use [all samples]\n");
+               fprintf(stderr, "         -1 INT    number of group-1 samples [0]\n");
+               fprintf(stderr, "         -U INT    number of permutations for association testing (effective with -1) [0]\n");
+               fprintf(stderr, "         -X FLOAT  only perform permutations for P(chi^2)<FLOAT [%g]\n", vc.min_perm_p);
                 fprintf(stderr, "\n");
                 return 1;
         }
@@ -344,6 +369,12 @@ int bcfview(int argc, char *argv[])
                 fprintf(stderr, "[%s] For VCF->BCF conversion please specify the sequence dictionary with -D\n", __func__);
                 return 1;
         }
+       if (vc.n1 <= 0) vc.n_perm = 0; // TODO: give a warning here!
+       if (vc.n_perm > 0) {
+               seeds = malloc(vc.n_perm * sizeof(int));
+               srand48(time(0));
+               for (c = 0; c < vc.n_perm; ++c) seeds[c] = lrand48();
+       }
         b = calloc(1, sizeof(bcf1_t));
         blast = calloc(1, sizeof(bcf1_t));
         strcpy(moder, "r");
@@ -399,6 +430,7 @@ int bcfview(int argc, char *argv[])
         while (vcf_read(bp, hin, b) > 0) {
                 int is_indel;
                 if (vc.n_sub) bcf_subsam(vc.n_sub, vc.sublist, b);
+               if (vc.flag & VC_FIX_PL) bcf_fix_pl(b);
                 is_indel = bcf_is_indel(b);
                 if ((vc.flag & VC_NO_INDEL) && is_indel) continue;
                 if ((vc.flag & VC_ACGT_ONLY) && !is_indel) {
@@ -434,6 +466,16 @@ int bcfview(int argc, char *argv[])
                                 bcf_p1_dump_afs(p1);
                         }
                         if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue;
+                       if (vc.n_perm && vc.n1 > 0 && pr.p_chi2 < vc.min_perm_p) { // permutation test
+                               bcf_p1rst_t r;
+                               int i, n = 0;
+                               for (i = 0; i < vc.n_perm; ++i) {
+                                       bcf_shuffle(b, seeds[i]);
+                                       bcf_p1_cal(b, p1, &r);
+                                       if (pr.p_chi2 >= r.p_chi2) ++n;
+                               }
+                               pr.perm_rank = n;
+                       }
                         update_bcf1(hout->n_smpl, b, p1, &pr, vc.pref, vc.flag);
                 }
                 if (vc.flag & VC_ADJLD) { // compute LD
@@ -471,6 +513,7 @@ int bcfview(int argc, char *argv[])
                 for (i = 0; i < vc.n_sub; ++i) free(vc.subsam[i]);
                 free(vc.subsam); free(vc.sublist);
         }
+       if (seeds) free(seeds);
         if (p1) bcf_p1_destroy(p1);
         return 0;
  }
diff --git a/bcftools/prob1.c b/bcftools/prob1.c

index 4804e6e24c3c6787f2ca3a18fb6e83687f379ed4..503a998457469cd2081ea3bf90b6aea3a96fea02 100644 (file)
--- a/bcftools/prob1.c
+++ b/bcftools/prob1.c
@@ -39,6 +39,7 @@ struct __bcf_p1aux_t {
         double *phi, *phi_indel;
         double *z, *zswap; // aux for afs
         double *z1, *z2, *phi1, *phi2; // only calculated when n1 is set
+       double **hg; // hypergeometric distribution
         double t, t1, t2;
         double *afs, *afs1; // afs: accumulative AFS; afs1: site posterior distribution
         const uint8_t *PL; // point to PL
@@ -173,6 +174,11 @@ int bcf_p1_set_n1(bcf_p1aux_t *b, int n1)
  void bcf_p1_destroy(bcf_p1aux_t *ma)
  {
         if (ma) {
+               int k;
+               if (ma->hg && ma->n1 > 0) {
+                       for (k = 0; k <= 2*ma->n1; ++k) free(ma->hg[k]);
+                       free(ma->hg);
+               }
                 free(ma->ploidy); free(ma->q2p); free(ma->pdg);
                 free(ma->phi); free(ma->phi_indel); free(ma->phi1); free(ma->phi2);
                 free(ma->z); free(ma->zswap); free(ma->z1); free(ma->z2);
@@ -231,7 +237,7 @@ int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
         }
         for (i = 0, sum = 0.; i < 3; ++i)
                 sum += (g[i] = pdg[i] * f3[i]);
-       for (i = 0, max = -1., max_i = 0; i <= ploidy; ++i) {
+       for (i = 0, max = -1., max_i = 0; i < 3; ++i) {
                 g[i] /= sum;
                 if (g[i] > max) max = g[i], max_i = i;
         }
@@ -258,24 +264,22 @@ static void mc_cal_y_core(bcf_p1aux_t *ma, int beg)
         last_min = last_max = 0;
         ma->t = 0.;
         if (ma->M == ma->n * 2) {
+               int M = 0;
                 for (_j = beg; _j < ma->n; ++_j) {
-                       int k, j = _j - beg, _min = last_min, _max = last_max;
+                       int k, j = _j - beg, _min = last_min, _max = last_max, M0;
                         double p[3], sum;
+                       M0 = M; M += 2;
                         pdg = ma->pdg + _j * 3;
                         p[0] = pdg[0]; p[1] = 2. * pdg[1]; p[2] = pdg[2];
                         for (; _min < _max && z[0][_min] < TINY; ++_min) z[0][_min] = z[1][_min] = 0.;
                         for (; _max > _min && z[0][_max] < TINY; --_max) z[0][_max] = z[1][_max] = 0.;
                         _max += 2;
-                       if (_min == 0) 
-                               k = 0, z[1][k] = (2*j+2-k)*(2*j-k+1) * p[0] * z[0][k];
-                       if (_min <= 1)
-                               k = 1, z[1][k] = (2*j+2-k)*(2*j-k+1) * p[0] * z[0][k] + k*(2*j+2-k) * p[1] * z[0][k-1];
+                       if (_min == 0) k = 0, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k];
+                       if (_min <= 1) k = 1, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1];
                         for (k = _min < 2? 2 : _min; k <= _max; ++k)
-                               z[1][k] = (2*j+2-k)*(2*j-k+1) * p[0] * z[0][k]
-                                       + k*(2*j+2-k) * p[1] * z[0][k-1]
-                                       + k*(k-1)* p[2] * z[0][k-2];
+                               z[1][k] = (M0-k+1)*(M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1] + k*(k-1)* p[2] * z[0][k-2];
                         for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k];
-                       ma->t += log(sum / ((2. * j + 2) * (2. * j + 1)));
+                       ma->t += log(sum / (M * (M - 1.)));
                         for (k = _min; k <= _max; ++k) z[1][k] /= sum;
                         if (_min >= 1) z[1][_min-1] = 0.;
                         if (_min >= 2) z[1][_min-2] = 0.;
@@ -287,6 +291,8 @@ static void mc_cal_y_core(bcf_p1aux_t *ma, int beg)
                         tmp = z[0]; z[0] = z[1]; z[1] = tmp;
                         last_min = _min; last_max = _max;
                 }
+               //for (_j = 0; _j < last_min; ++_j) z[0][_j] = 0.; // TODO: are these necessary?
+               //for (_j = last_max + 1; _j < ma->M; ++_j) z[0][_j] = 0.;
         } else { // this block is very similar to the block above; these two might be merged in future
                 int j, M = 0;
                 for (j = 0; j < ma->n; ++j) {
@@ -347,26 +353,104 @@ static void mc_cal_y(bcf_p1aux_t *ma)
         } else mc_cal_y_core(ma, 0);
  }
  
-static void contrast(bcf_p1aux_t *ma, double pc[4]) // mc_cal_y() must be called before hand
+#define CONTRAST_TINY 1e-30
+
+extern double kf_gammaq(double s, double z); // incomplete gamma function for chi^2 test
+
+static inline double chi2_test(int a, int b, int c, int d)
+{
+       double x, z;
+       x = (double)(a+b) * (c+d) * (b+d) * (a+c);
+       if (x == 0.) return 1;
+       z = a * d - b * c;
+       return kf_gammaq(.5, .5 * z * z * (a+b+c+d) / x);
+}
+
+// chi2=(a+b+c+d)(ad-bc)^2/[(a+b)(c+d)(a+c)(b+d)]
+static inline double contrast2_aux(const bcf_p1aux_t *p1, double sum, int n1, int n2, int k1, int k2, double x[3])
+{
+       double p = p1->phi[k1+k2] * p1->z1[k1] * p1->z2[k2] / sum * p1->hg[k1][k2];
+       if (p < CONTRAST_TINY) return -1;
+       if (.5*k1/n1 < .5*k2/n2) x[1] += p;
+       else if (.5*k1/n1 > .5*k2/n2) x[2] += p;
+       else x[0] += p;
+       return p * chi2_test(k1, k2, (n1<<1) - k1, (n2<<1) - k2);
+}
+
+static double contrast2(bcf_p1aux_t *p1, double ret[3])
  {
-       int k, n1 = ma->n1, n2 = ma->n - ma->n1;
-       long double sum1, sum2;
-       pc[0] = pc[1] = pc[2] = pc[3] = -1.;
-       if (n1 <= 0 || n2 <= 0) return;
-       for (k = 0, sum1 = 0.; k <= 2*n1; ++k) sum1 += ma->phi1[k] * ma->z1[k];
-       for (k = 0, sum2 = 0.; k <= 2*n2; ++k) sum2 += ma->phi2[k] * ma->z2[k];
-       pc[2] = ma->phi1[2*n1] * ma->z1[2*n1] / sum1;
-       pc[3] = ma->phi2[2*n2] * ma->z2[2*n2] / sum2;
-       for (k = 2; k < 4; ++k) {
-               pc[k] = pc[k] > .5? -(-4.343 * log(1. - pc[k] + TINY) + .499) : -4.343 * log(pc[k] + TINY) + .499;
-               pc[k] = (int)pc[k];
-               if (pc[k] > 99) pc[k] = 99;
-               if (pc[k] < -99) pc[k] = -99;
+       int k, k1, k2, k10, k20, n1, n2;
+       double sum;
+       // get n1 and n2
+       n1 = p1->n1; n2 = p1->n - p1->n1;
+       if (n1 <= 0 || n2 <= 0) return 0.;
+       if (p1->hg == 0) { // initialize the hypergeometric distribution
+               /* NB: the hg matrix may take a lot of memory when there are many samples. There is a way
+                  to avoid precomputing this matrix, but it is slower and quite intricate. The following
+                  computation in this block can be accelerated with a similar strategy, but perhaps this
+                  is not a serious concern for now. */
+               double tmp = lgamma(2*(n1+n2)+1) - (lgamma(2*n1+1) + lgamma(2*n2+1));
+               p1->hg = calloc(2*n1+1, sizeof(void*));
+               for (k1 = 0; k1 <= 2*n1; ++k1) {
+                       p1->hg[k1] = calloc(2*n2+1, sizeof(double));
+                       for (k2 = 0; k2 <= 2*n2; ++k2)
+                               p1->hg[k1][k2] = exp(lgamma(k1+k2+1) + lgamma(p1->M-k1-k2+1) - (lgamma(k1+1) + lgamma(k2+1) + lgamma(2*n1-k1+1) + lgamma(2*n2-k2+1) + tmp));
+               }
+       }
+       { // compute sum1 and sum2
+               long double suml = 0;
+               for (k = 0; k <= p1->M; ++k) suml += p1->phi[k] * p1->z[k];
+               sum = suml;
+       }
+       { // get the mean k1 and k2
+               double max;
+               int max_k;
+               for (k = 0, max = 0, max_k = -1; k <= 2*n1; ++k) {
+                       double x = p1->phi1[k] * p1->z1[k];
+                       if (x > max) max = x, max_k = k;
+               }
+               k10 = max_k;
+               for (k = 0, max = 0, max_k = -1; k <= 2*n2; ++k) {
+                       double x = p1->phi2[k] * p1->z2[k];
+                       if (x > max) max = x, max_k = k;
+               }
+               k20 = max_k;
+       }
+       { // We can do the following with one nested loop, but that is an O(N^2) thing. The following code block is much faster for large N.
+               double x[3], y;
+               long double z = 0.;
+               x[0] = x[1] = x[2] = 0;
+               for (k1 = k10; k1 >= 0; --k1) {
+                       for (k2 = k20; k2 >= 0; --k2) {
+                               if ((y = contrast2_aux(p1, sum, n1, n2, k1, k2, x)) < 0) break;
+                               else z += y;
+                       }
+                       for (k2 = k20 + 1; k2 <= 2*n2; ++k2) {
+                               if ((y = contrast2_aux(p1, sum, n1, n2, k1, k2, x)) < 0) break;
+                               else z += y;
+                       }
+               }
+               ret[0] = x[0]; ret[1] = x[1]; ret[2] = x[2];
+               x[0] = x[1] = x[2] = 0;
+               for (k1 = k10 + 1; k1 <= 2*n1; ++k1) {
+                       for (k2 = k20; k2 >= 0; --k2) {
+                               if ((y = contrast2_aux(p1, sum, n1, n2, k1, k2, x)) < 0) break;
+                               else z += y;
+                       }
+                       for (k2 = k20 + 1; k2 <= 2*n2; ++k2) {
+                               if ((y = contrast2_aux(p1, sum, n1, n2, k1, k2, x)) < 0) break;
+                               else z += y;
+                       }
+               }
+               ret[0] += x[0]; ret[1] += x[1]; ret[2] += x[2];
+               if (ret[0] + ret[1] + ret[2] < 0.99) { // in case of bad things happened
+                       ret[0] = ret[1] = ret[2] = 0;
+                       for (k1 = 0, z = 0.; k1 <= 2*n1; ++k1)
+                               for (k2 = 0; k2 <= 2*n2; ++k2)
+                                       if ((y = contrast2_aux(p1, sum, n1, n2, k1, k2, ret)) >= 0) z += y;
+               }
+               return (double)z;
         }
-       pc[0] = ma->phi2[2*n2] * ma->z2[2*n2] / sum2 * (1. - ma->phi1[2*n1] * ma->z1[2*n1] / sum1);
-       pc[1] = ma->phi1[2*n1] * ma->z1[2*n1] / sum1 * (1. - ma->phi2[2*n2] * ma->z2[2*n2] / sum2);
-       pc[0] = pc[0] == 1.? 99 : (int)(-4.343 * log(1. - pc[0]) + .499);
-       pc[1] = pc[1] == 1.? 99 : (int)(-4.343 * log(1. - pc[1]) + .499);
  }
  
  static double mc_cal_afs(bcf_p1aux_t *ma, double *p_ref_folded, double *p_var_folded)
@@ -403,6 +487,7 @@ int bcf_p1_cal(const bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst)
         int i, k;
         long double sum = 0.;
         ma->is_indel = bcf_is_indel(b);
+       rst->perm_rank = -1;
         // set PL and PL_len
         for (i = 0; i < b->n_gi; ++i) {
                 if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
@@ -450,7 +535,9 @@ int bcf_p1_cal(const bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst)
                 rst->cil = (double)(ma->M - h) / ma->M; rst->cih = (double)(ma->M - l) / ma->M;
         }
         rst->g[0] = rst->g[1] = rst->g[2] = -1.;
-       contrast(ma, rst->pc);
+       rst->cmp[0] = rst->cmp[1] = rst->cmp[2] = rst->p_chi2 = -1.0;
+       if (rst->p_var > 0.1) // skip contrast2() if the locus is a strong non-variant
+               rst->p_chi2 = contrast2(ma, rst->cmp);
         return 0;
  }
  
diff --git a/bcftools/prob1.h b/bcftools/prob1.h

index eb4dc9216c27358712c98ec6444f5ea249ad6242..3f89dda5fafb59278e8e60cadcd82fe46fb0eaf0 100644 (file)
--- a/bcftools/prob1.h
+++ b/bcftools/prob1.h
@@ -7,10 +7,10 @@ struct __bcf_p1aux_t;
  typedef struct __bcf_p1aux_t bcf_p1aux_t;
  
  typedef struct {
-       int rank0;
+       int rank0, perm_rank; // NB: perm_rank is always set to -1 by bcf_p1_cal()
         double f_em, f_exp, f_flat, p_ref_folded, p_ref, p_var_folded, p_var;
         double cil, cih;
-       double pc[4];
+       double cmp[3], p_chi2; // used by contrast2()
         double g[3];
  } bcf_p1rst_t;
  
diff --git a/bgzf.c b/bgzf.c

index 66d6b024f5441f694faf1d875febe9fe8e3c248c..db970c83f621bac471457bc58821e69e186e03ee 100644 (file)
--- a/bgzf.c
+++ b/bgzf.c
@@ -148,7 +148,7 @@ open_read(int fd)
  
  static
  BGZF*
-open_write(int fd, bool is_uncompressed)
+open_write(int fd, int compress_level) // compress_level==-1 for the default level
  {
      FILE* file = fdopen(fd, "w");
      BGZF* fp;
@@ -156,7 +156,9 @@ open_write(int fd, bool is_uncompressed)
         fp = malloc(sizeof(BGZF));
      fp->file_descriptor = fd;
      fp->open_mode = 'w';
-    fp->owned_file = 0; fp->is_uncompressed = is_uncompressed;
+    fp->owned_file = 0;
+       fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
+       if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
  #ifdef _USE_KNETFILE
      fp->x.fpw = file;
  #else
@@ -195,13 +197,20 @@ bgzf_open(const char* __restrict path, const char* __restrict mode)
          fp = open_read(fd);
  #endif
      } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
-               int fd, oflag = O_WRONLY | O_CREAT | O_TRUNC;
+               int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
  #ifdef _WIN32
                 oflag |= O_BINARY;
  #endif
                 fd = open(path, oflag, 0666);
                 if (fd == -1) return 0;
-        fp = open_write(fd, strchr(mode, 'u')? 1 : 0);
+               { // set compress_level
+                       int i;
+                       for (i = 0; mode[i]; ++i)
+                               if (mode[i] >= '0' && mode[i] <= '9') break;
+                       if (mode[i]) compress_level = (int)mode[i] - '0';
+                       if (strchr(mode, 'u')) compress_level = 0;
+               }
+        fp = open_write(fd, compress_level);
      }
      if (fp != NULL) fp->owned_file = 1;
      return fp;
@@ -214,7 +223,12 @@ bgzf_fdopen(int fd, const char * __restrict mode)
      if (mode[0] == 'r' || mode[0] == 'R') {
          return open_read(fd);
      } else if (mode[0] == 'w' || mode[0] == 'W') {
-        return open_write(fd, strstr(mode, "u")? 1 : 0);
+               int i, compress_level = -1;
+               for (i = 0; mode[i]; ++i)
+                       if (mode[i] >= '0' && mode[i] <= '9') break;
+               if (mode[i]) compress_level = (int)mode[i] - '0';
+               if (strchr(mode, 'u')) compress_level = 0;
+        return open_write(fd, compress_level);
      } else {
          return NULL;
      }
@@ -254,7 +268,6 @@ deflate_block(BGZF* fp, int block_length)
      int input_length = block_length;
      int compressed_length = 0;
      while (1) {
-               int compress_level = fp->is_uncompressed? 0 : Z_DEFAULT_COMPRESSION;
          z_stream zs;
          zs.zalloc = NULL;
          zs.zfree = NULL;
@@ -263,7 +276,7 @@ deflate_block(BGZF* fp, int block_length)
          zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
          zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
  
-        int status = deflateInit2(&zs, compress_level, Z_DEFLATED,
+        int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
                                    GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
          if (status != Z_OK) {
              report_error(fp, "deflate init failed");
@@ -330,6 +343,7 @@ inflate_block(BGZF* fp, int block_length)
      // Inflate the block in fp->compressed_block into fp->uncompressed_block
  
      z_stream zs;
+       int status;
      zs.zalloc = NULL;
      zs.zfree = NULL;
      zs.next_in = fp->compressed_block + 18;
@@ -337,7 +351,7 @@ inflate_block(BGZF* fp, int block_length)
      zs.next_out = fp->uncompressed_block;
      zs.avail_out = fp->uncompressed_block_size;
  
-    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    status = inflateInit2(&zs, GZIP_WINDOW_BITS);
      if (status != Z_OK) {
          report_error(fp, "inflate init failed");
          return -1;
@@ -431,7 +445,7 @@ int
  bgzf_read_block(BGZF* fp)
  {
      bgzf_byte_t header[BLOCK_HEADER_LENGTH];
-       int count, size = 0;
+       int count, size = 0, block_length, remaining;
  #ifdef _USE_KNETFILE
      int64_t block_address = knet_tell(fp->x.fpr);
         if (load_block_from_cache(fp, block_address)) return 0;
@@ -454,10 +468,10 @@ bgzf_read_block(BGZF* fp)
          report_error(fp, "invalid block header");
          return -1;
      }
-    int block_length = unpackInt16((uint8_t*)&header[16]) + 1;
+    block_length = unpackInt16((uint8_t*)&header[16]) + 1;
      bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
      memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
-    int remaining = block_length - BLOCK_HEADER_LENGTH;
+    remaining = block_length - BLOCK_HEADER_LENGTH;
  #ifdef _USE_KNETFILE
      count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
  #else
@@ -494,7 +508,8 @@ bgzf_read(BGZF* fp, void* data, int length)
      int bytes_read = 0;
      bgzf_byte_t* output = data;
      while (bytes_read < length) {
-        int available = fp->block_length - fp->block_offset;
+        int copy_length, available = fp->block_length - fp->block_offset;
+               bgzf_byte_t *buffer;
          if (available <= 0) {
              if (bgzf_read_block(fp) != 0) {
                  return -1;
@@ -504,8 +519,8 @@ bgzf_read(BGZF* fp, void* data, int length)
                  break;
              }
          }
-        int copy_length = bgzf_min(length-bytes_read, available);
-        bgzf_byte_t* buffer = fp->uncompressed_block;
+        copy_length = bgzf_min(length-bytes_read, available);
+        buffer = fp->uncompressed_block;
          memcpy(output, buffer + fp->block_offset, copy_length);
          fp->block_offset += copy_length;
          output += copy_length;
@@ -552,6 +567,8 @@ int bgzf_flush_try(BGZF *fp, int size)
  
  int bgzf_write(BGZF* fp, const void* data, int length)
  {
+       const bgzf_byte_t *input = data;
+       int block_length, bytes_written;
      if (fp->open_mode != 'w') {
          report_error(fp, "file not open for writing");
          return -1;
@@ -560,9 +577,9 @@ int bgzf_write(BGZF* fp, const void* data, int length)
      if (fp->uncompressed_block == NULL)
          fp->uncompressed_block = malloc(fp->uncompressed_block_size);
  
-    const bgzf_byte_t* input = data;
-    int block_length = fp->uncompressed_block_size;
-    int bytes_written = 0;
+    input = data;
+    block_length = fp->uncompressed_block_size;
+    bytes_written = 0;
      while (bytes_written < length) {
          int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
          bgzf_byte_t* buffer = fp->uncompressed_block;
diff --git a/bgzf.h b/bgzf.h

index 099ae9a6da1785ae132be55932a2e5c930ba49cc..9dc7b5fdb600087c5b4c44ff202c05f914626a23 100644 (file)
--- a/bgzf.h
+++ b/bgzf.h
@@ -26,7 +26,6 @@
  
  #include <stdint.h>
  #include <stdio.h>
-#include <stdbool.h>
  #include <zlib.h>
  #ifdef _USE_KNETFILE
  #include "knetfile.h"
@@ -37,7 +36,7 @@
  typedef struct {
      int file_descriptor;
      char open_mode;  // 'r' or 'w'
-    bool owned_file, is_uncompressed;
+    int16_t owned_file, compress_level;
  #ifdef _USE_KNETFILE
         union {
                 knetFile *fpr;
diff --git a/sam.c b/sam.c

index ecdee02dddb98a32d47a59e3154179356acecadf..e195b0f13e93d018d05f0784fc76c437e2eec8cd 100644 (file)
--- a/sam.c
+++ b/sam.c
@@ -40,9 +40,9 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
  {
         samfile_t *fp;
         fp = (samfile_t*)calloc(1, sizeof(samfile_t));
-       if (mode[0] == 'r') { // read
+       if (strchr(mode, 'r')) { // read
                 fp->type |= TYPE_READ;
-               if (mode[1] == 'b') { // binary
+               if (strchr(mode, 'b')) { // binary
                         fp->type |= TYPE_BAM;
                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
                         if (fp->x.bam == 0) goto open_err_ret;
@@ -63,11 +63,15 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
                                         fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
                         } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
                 }
-       } else if (mode[0] == 'w') { // write
+       } else if (strchr(mode, 'w')) { // write
                 fp->header = bam_header_dup((const bam_header_t*)aux);
-               if (mode[1] == 'b') { // binary
+               if (strchr(mode, 'b')) { // binary
                         char bmode[3];
-                       bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0;
+                       int i, compress_level = -1;
+                       for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break;
+                       if (mode[i]) compress_level = mode[i] - '0';
+                       if (strchr(mode, 'u')) compress_level = 0;
+                       bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0;
                         fp->type |= TYPE_BAM;
                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
                         if (fp->x.bam == 0) goto open_err_ret;
@@ -76,11 +80,11 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
                         // open file
                         fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
                         if (fp->x.tamr == 0) goto open_err_ret;
-                       if (strstr(mode, "X")) fp->type |= BAM_OFSTR<<2;
-                       else if (strstr(mode, "x")) fp->type |= BAM_OFHEX<<2;
+                       if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2;
+                       else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2;
                         else fp->type |= BAM_OFDEC<<2;
                         // write header
-                       if (strstr(mode, "h")) {
+                       if (strchr(mode, 'h')) {
                                 int i;
                                 bam_header_t *alt;
                                 // parse the header text 
diff --git a/sam_view.c b/sam_view.c

index eb69449f9e44d643ef43f31638fd8534c454c940..170bd843608b99aec35161141ee6afaad848db5e 100644 (file)
--- a/sam_view.c
+++ b/sam_view.c
@@ -82,7 +82,7 @@ static int usage(int is_long_help);
  
  int main_samview(int argc, char *argv[])
  {
-       int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0, slx2sngr = 0, is_count = 0;
+       int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, compress_level = -1, is_bamout = 0, slx2sngr = 0, is_count = 0;
         int of_type = BAM_OFDEC, is_long_help = 0;
         int count = 0;
         samfile_t *in = 0, *out = 0;
@@ -90,7 +90,7 @@ int main_samview(int argc, char *argv[])
  
         /* parse command-line options */
         strcpy(in_mode, "r"); strcpy(out_mode, "w");
-       while ((c = getopt(argc, argv, "Sbct:hHo:q:f:F:ul:r:xX?T:CR:")) >= 0) {
+       while ((c = getopt(argc, argv, "Sbct:h1Ho:q:f:F:ul:r:xX?T:CR:")) >= 0) {
                 switch (c) {
                 case 'c': is_count = 1; break;
                 case 'C': slx2sngr = 1; break;
@@ -103,7 +103,8 @@ int main_samview(int argc, char *argv[])
                 case 'f': g_flag_on = strtol(optarg, 0, 0); break;
                 case 'F': g_flag_off = strtol(optarg, 0, 0); break;
                 case 'q': g_min_mapQ = atoi(optarg); break;
-               case 'u': is_uncompressed = 1; break;
+               case 'u': compress_level = 0; break;
+               case '1': compress_level = 1; break;
                 case 'l': g_library = strdup(optarg); break;
                 case 'r': g_rg = strdup(optarg); break;
                 case 'R': fn_rg = strdup(optarg); break;
@@ -114,7 +115,7 @@ int main_samview(int argc, char *argv[])
                 default: return usage(is_long_help);
                 }
         }
-       if (is_uncompressed) is_bamout = 1;
+       if (compress_level >= 0) is_bamout = 1;
         if (is_header_only) is_header = 1;
         if (is_bamout) strcat(out_mode, "b");
         else {
@@ -123,7 +124,11 @@ int main_samview(int argc, char *argv[])
         }
         if (is_bamin) strcat(in_mode, "b");
         if (is_header) strcat(out_mode, "h");
-       if (is_uncompressed) strcat(out_mode, "u");
+       if (compress_level >= 0) {
+               char tmp[2];
+               tmp[0] = compress_level + '0'; tmp[1] = '\0';
+               strcat(out_mode, tmp);
+       }
         if (argc == optind) return usage(is_long_help); // potential memory leak...
  
         // read the list of read groups
@@ -231,6 +236,7 @@ static int usage(int is_long_help)
         fprintf(stderr, "         -H       print header only (no alignments)\n");
         fprintf(stderr, "         -S       input is SAM\n");
         fprintf(stderr, "         -u       uncompressed BAM output (force -b)\n");
+       fprintf(stderr, "         -1       fast compression (force -b)\n");
         fprintf(stderr, "         -x       output FLAG in HEX (samtools-C specific)\n");
         fprintf(stderr, "         -X       output FLAG in string (samtools-C specific)\n");
         fprintf(stderr, "         -c       print only the count of matching records\n");
diff --git a/samtools.1 b/samtools.1

index e0c77439751765b90aeb3c4ef50b8f4cb7d4a096..483467983b5778bbff4a5bee0a8542b893aca75a 100644 (file)
--- a/samtools.1
+++ b/samtools.1
@@ -1,4 +1,4 @@
-.TH samtools 1 "1 March 2011" "samtools-0.1.13" "Bioinformatics tools"
+.TH samtools 1 "16 March 2011" "samtools-0.1.14" "Bioinformatics tools"
  .SH NAME
  .PP
  samtools - Utilities for the Sequence Alignment/Map (SAM) format
@@ -145,7 +145,10 @@ identifiers are absent, each input file is regarded as one sample.
  
  .B OPTIONS:
  .RS
-.TP 8
+.TP 10
+.B -A
+Do not skip anomalous read pairs in variant calling.
+.TP
  .B -B
  Disable probabilistic realignment for the computation of base alignment
  quality (BAQ). BAQ is the Phred-scaled probability of a read base being
@@ -159,6 +162,14 @@ being generated from the mapped position, the new mapping quality is
  about sqrt((INT-q)/INT)*INT. A zero value disables this
  functionality; if enabled, the recommended value for BWA is 50. [0]
  .TP
+.BI -d \ INT
+At a position, read maximally
+.I INT
+reads per input BAM. [250]
+.TP
+.B -D
+Output per-sample read depth
+.TP
  .BI -e \ INT
  Phred-scaled gap extension sequencing error probability. Reducing
  .I INT
@@ -184,9 +195,17 @@ is modeled as
  .IR INT * s / l .
  [100]
  .TP
+.B -I
+Do not perform INDEL calling
+.TP
  .BI -l \ FILE
  File containing a list of sites where pileup or BCF is outputted [null]
  .TP
+.BI -L \ INT
+Skip INDEL calling if the average per-sample depth is above
+.IR INT .
+[250]
+.TP
  .BI -o \ INT
  Phred-scaled gap open sequencing error probability. Reducing
  .I INT
@@ -210,6 +229,9 @@ Only generate pileup in region
  .I STR
  [all sites]
  .TP
+.B -S
+Output per-sample Phred-scaled strand bias P-value
+.TP
  .B -u
  Similar to
  .B -g
author	Heng Li <lh3@live.co.uk>
	Fri, 18 Mar 2011 20:33:51 +0000 (20:33 +0000)
committer	Heng Li <lh3@live.co.uk>
	Fri, 18 Mar 2011 20:33:51 +0000 (20:33 +0000)
Makefile		patch \| blob \| history
NEWS		patch \| blob \| history
bam.h		patch \| blob \| history
bam_plcmd.c		patch \| blob \| history
bam_sort.c		patch \| blob \| history
bamtk.c		patch \| blob \| history
bcftools/bcf.h		patch \| blob \| history
bcftools/bcf.tex		patch \| blob \| history
bcftools/bcftools.1		patch \| blob \| history
bcftools/bcfutils.c		patch \| blob \| history
bcftools/call1.c		patch \| blob \| history
bcftools/prob1.c		patch \| blob \| history
bcftools/prob1.h		patch \| blob \| history
bgzf.c		patch \| blob \| history
bgzf.h		patch \| blob \| history
sam.c		patch \| blob \| history
sam_view.c		patch \| blob \| history
samtools.1		patch \| blob \| history