From 8ef16f0ffefe94ead2c9e367ebcb66490fdea2cb Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 22 Mar 2011 03:08:31 +0000 Subject: [PATCH] Release samtools-0.1.14 (r933:170) --- bam.h | 2 +- bam_plcmd.c | 11 ++++++++++- bcftools/bcftools.1 | 13 +++++++++++-- bcftools/bcfutils.c | 21 +++++++++++++++++++++ bcftools/call1.c | 14 +++++++++++--- samtools.1 | 10 ++++++++++ 6 files changed, 64 insertions(+), 7 deletions(-) diff --git a/bam.h b/bam.h index 84253ae..bc8e3f1 100644 --- a/bam.h +++ b/bam.h @@ -40,7 +40,7 @@ @copyright Genome Research Ltd. */ -#define BAM_VERSION "0.1.13 (r926:161)" +#define BAM_VERSION "0.1.14 (r933:170)" #include #include diff --git a/bam_plcmd.c b/bam_plcmd.c index 368502c..fd75cec 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -534,6 +534,7 @@ int bam_pileup(int argc, char *argv[]) #define MPLP_FMT_SP 0x200 #define MPLP_NO_INDEL 0x400 #define MPLP_EXT_BAQ 0x800 +#define MPLP_ILLUMINA13 0x1000 typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth; @@ -575,6 +576,12 @@ static int mplp_func(void *data, bam1_t *b) skip = (rg && bcf_str2id(ma->conf->rghash, (const char*)(rg+1)) >= 0); if (skip) continue; } + if (ma->conf->flag & MPLP_ILLUMINA13) { + int i; + uint8_t *qual = bam1_qual(b); + for (i = 0; i < b->core.l_qseq; ++i) + qual[i] = qual[i] > 31? qual[i] - 31 : 0; + } has_ref = (ma->ref && ma->ref_id == b->core.tid)? 1 : 0; skip = 0; if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, (ma->conf->flag & MPLP_EXT_BAQ)? 3 : 1); @@ -872,7 +879,7 @@ int bam_mpileup(int argc, char *argv[]) mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; - while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:")) >= 0) { + while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); @@ -891,6 +898,7 @@ int bam_mpileup(int argc, char *argv[]) case 'S': mplp.flag |= MPLP_FMT_SP; break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_EXT_BAQ; break; + case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; @@ -936,6 +944,7 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support); fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac); fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n"); + fprintf(stderr, " -6 quality is in the Illumina-1.3+ encoding\n"); fprintf(stderr, " -A use anomalous read pairs in SNP/INDEL calling\n"); fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); diff --git a/bcftools/bcftools.1 b/bcftools/bcftools.1 index 236abe4..6d11e77 100644 --- a/bcftools/bcftools.1 +++ b/bcftools/bcftools.1 @@ -37,6 +37,8 @@ estimating site allele frequencies and allele frequency spectrums. .IR prior ] .RB [ \-1 .IR nGroup1 ] +.RB [ \-d +.IR minFrac ] .RB [ \-U .IR nPerm ] .RB [ \-X @@ -77,8 +79,10 @@ Skip sites where the REF field is not A/C/G/T Output the QCALL likelihood format .TP .BI -s \ FILE -List of samples to use. In the output, the ordering of samples will be identical -to the one in +List of samples to use. The first column in the input gives the sample names +and the second gives the ploidy, which can only be 1 or 2. When the 2nd column +is absent, the sample ploidy is assumed to be 2. In the output, the ordering of +samples will be identical to the one in .IR FILE . [null] .TP @@ -93,6 +97,11 @@ Uncompressed BCF output (force -b). .B -c Call variants. .TP +.BI -d \ FLOAT +When +.B -v +is in use, skip loci where the fraction of samples covered by reads is below FLOAT. [0] +.TP .B -g Call per-sample genotypes at variant sites (force -c) .TP diff --git a/bcftools/bcfutils.c b/bcftools/bcfutils.c index 91cc2c2..d1b9668 100644 --- a/bcftools/bcfutils.c +++ b/bcftools/bcfutils.c @@ -168,6 +168,27 @@ int bcf_fix_pl(bcf1_t *b) return 0; } +int bcf_smpl_covered(const bcf1_t *b) +{ + int i, j, n = 0; + uint32_t tmp; + bcf_ginfo_t *gi; + // pinpoint PL + tmp = bcf_str2int("PL", 2); + for (i = 0; i < b->n_gi; ++i) + if (b->gi[i].fmt == tmp) break; + if (i == b->n_gi) return 0; + // count how many samples having PL!=[0..0] + gi = b->gi + i; + for (i = 0; i < b->n_smpl; ++i) { + uint8_t *PLi = ((uint8_t*)gi->data) + i * gi->len; + for (j = 0; j < gi->len; ++j) + if (PLi[j]) break; + if (j < gi->len) ++n; + } + return n; +} + static void *locate_field(const bcf1_t *b, const char *fmt, int l) { int i; diff --git a/bcftools/call1.c b/bcftools/call1.c index a520e3c..e074fb5 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -33,7 +33,7 @@ typedef struct { int flag, prior_type, n1, n_sub, *sublist, n_perm; char *fn_list, *prior_file, **subsam, *fn_dict; uint8_t *ploidy; - double theta, pref, indel_frac, min_perm_p; + double theta, pref, indel_frac, min_perm_p, min_smpl_frac; } viewconf_t; khash_t(set64) *bcf_load_pos(const char *fn, bcf_hdr_t *_h) @@ -297,8 +297,8 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); - vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; - while ((c = getopt(argc, argv, "FN1:l:cHAGvbSuP:t:p:QgLi:IMs:D:U:X:")) >= 0) { + vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; + while ((c = getopt(argc, argv, "FN1:l:cHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; case 'l': vc.fn_list = strdup(optarg); break; @@ -322,6 +322,7 @@ int bcfview(int argc, char *argv[]) case 'L': vc.flag |= VC_ADJLD; break; case 'U': vc.n_perm = atoi(optarg); break; case 'X': vc.min_perm_p = atof(optarg); break; + case 'd': vc.min_smpl_frac = atof(optarg); break; case 's': vc.subsam = read_samples(optarg, &vc.n_sub); vc.ploidy = calloc(vc.n_sub + 1, 1); for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1]; @@ -351,6 +352,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -L calculate LD for adjacent sites\n"); fprintf(stderr, " -I skip indels\n"); fprintf(stderr, " -F PL generated by r921 or before\n"); + fprintf(stderr, " -d FLOAT skip loci where less than FLOAT fraction of samples covered [0]\n"); fprintf(stderr, " -D FILE sequence dictionary for VCF->BCF conversion [null]\n"); fprintf(stderr, " -l FILE list of sites to output [all sites]\n"); fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4g]\n", vc.theta); @@ -429,6 +431,12 @@ int bcfview(int argc, char *argv[]) } while (vcf_read(bp, hin, b) > 0) { int is_indel; + if ((vc.flag & VC_VARONLY) && strcmp(b->alt, "X") == 0) continue; + if ((vc.flag & VC_VARONLY) && vc.min_smpl_frac > 0.) { + extern int bcf_smpl_covered(const bcf1_t *b); + int n = bcf_smpl_covered(b); + if ((double)n / b->n_smpl < vc.min_smpl_frac) continue; + } if (vc.n_sub) bcf_subsam(vc.n_sub, vc.sublist, b); if (vc.flag & VC_FIX_PL) bcf_fix_pl(b); is_indel = bcf_is_indel(b); diff --git a/samtools.1 b/samtools.1 index 4834679..ba32392 100644 --- a/samtools.1 +++ b/samtools.1 @@ -249,6 +249,16 @@ with the header in This command is much faster than replacing the header with a BAM->SAM->BAM conversion. +.TP +.B cat +samtools cat [-h header.sam] [-o out.bam] [ ... ] + +Concatenate BAMs. The sequence dictionary of each input BAM must be identical, +although this command does not check this. This command uses a similar trick +to +.B reheader +which enables fast BAM concatenation. + .TP .B sort samtools sort [-no] [-m maxMem] -- 2.39.2