#include "kstring.h"
#include "sam_header.h"
-int bam_is_be = 0, bam_verbose = 2;
+int bam_is_be = 0, bam_verbose = 2, bam_no_B = 0;
char *bam_flag2char_table = "pPuUrR12sfd\0\0\0\0\0";
/**************************
uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar)
{
- uint32_t k, end;
- end = c->pos;
+ int k, end = c->pos;
for (k = 0; k < c->n_cigar; ++k) {
- int op = cigar[k] & BAM_CIGAR_MASK;
- if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP)
- end += cigar[k] >> BAM_CIGAR_SHIFT;
+ int op = bam_cigar_op(cigar[k]);
+ int len = bam_cigar_oplen(cigar[k]);
+ if (op == BAM_CBACK) { // move backward
+ int l, u, v;
+ if (k == c->n_cigar - 1) break; // skip trailing 'B'
+ for (l = k - 1, u = v = 0; l >= 0; --l) {
+ int op1 = bam_cigar_op(cigar[l]);
+ int len1 = bam_cigar_oplen(cigar[l]);
+ if (bam_cigar_type(op1)&1) { // consume query
+ if (u + len1 >= len) { // stop
+ if (bam_cigar_type(op1)&2) v += len - u;
+ break;
+ } else u += len1;
+ }
+ if (bam_cigar_type(op1)&2) v += len1;
+ }
+ end = l < 0? c->pos : end - v;
+ } else if (bam_cigar_type(op)&2) end += bam_cigar_oplen(cigar[k]);
}
return end;
}
{
uint32_t k;
int32_t l = 0;
- for (k = 0; k < c->n_cigar; ++k) {
- int op = cigar[k] & BAM_CIGAR_MASK;
- if (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP || op == BAM_CEQUAL || op == BAM_CDIFF)
- l += cigar[k] >> BAM_CIGAR_SHIFT;
- }
+ for (k = 0; k < c->n_cigar; ++k)
+ if (bam_cigar_type(bam_cigar_op(cigar[k]))&1)
+ l += bam_cigar_oplen(cigar[k]);
return l;
}
if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4;
b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
+ if (bam_no_B) bam_remove_B(b);
return 4 + block_len;
}
kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str);
if (c->n_cigar == 0) kputc('*', &str);
else {
+ uint32_t *cigar = bam1_cigar(b);
for (i = 0; i < c->n_cigar; ++i) {
kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
- kputc("MIDNSHP=X"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
+ kputc(bam_cigar_opchr(cigar[i]), &str);
}
}
kputc('\t', &str);
rg = bam_aux_get(b, "RG");
return (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1));
}
+
+/************
+ * Remove B *
+ ************/
+
+int bam_remove_B(bam1_t *b)
+{
+ int i, j, end_j, k, l, no_qual;
+ uint32_t *cigar, *new_cigar;
+ uint8_t *seq, *qual, *p;
+ // test if removal is necessary
+ if (b->core.flag & BAM_FUNMAP) return 0; // unmapped; do nothing
+ cigar = bam1_cigar(b);
+ for (k = 0; k < b->core.n_cigar; ++k)
+ if (bam_cigar_op(cigar[k]) == BAM_CBACK) break;
+ if (k == b->core.n_cigar) return 0; // no 'B'
+ if (bam_cigar_op(cigar[0]) == BAM_CBACK) goto rmB_err; // cannot be removed
+ // allocate memory for the new CIGAR
+ if (b->data_len + (b->core.n_cigar + 1) * 4 > b->m_data) { // not enough memory
+ b->m_data = b->data_len + b->core.n_cigar * 4;
+ kroundup32(b->m_data);
+ b->data = (uint8_t*)realloc(b->data, b->m_data);
+ cigar = bam1_cigar(b); // after realloc, cigar may be changed
+ }
+ new_cigar = (uint32_t*)(b->data + (b->m_data - b->core.n_cigar * 4)); // from the end of b->data
+ // the core loop
+ seq = bam1_seq(b); qual = bam1_qual(b);
+ no_qual = (qual[0] == 0xff); // test whether base quality is available
+ i = j = 0; end_j = -1;
+ for (k = l = 0; k < b->core.n_cigar; ++k) {
+ int op = bam_cigar_op(cigar[k]);
+ int len = bam_cigar_oplen(cigar[k]);
+ if (op == BAM_CBACK) { // the backward operation
+ int t, u;
+ if (k == b->core.n_cigar - 1) break; // ignore 'B' at the end of CIGAR
+ if (len > j) goto rmB_err; // an excessively long backward
+ for (t = l - 1, u = 0; t >= 0; --t) { // look back
+ int op1 = bam_cigar_op(new_cigar[t]);
+ int len1 = bam_cigar_oplen(new_cigar[t]);
+ if (bam_cigar_type(op1)&1) { // consume the query
+ if (u + len1 >= len) { // stop
+ new_cigar[t] -= (len - u) << BAM_CIGAR_SHIFT;
+ break;
+ } else u += len1;
+ }
+ }
+ if (bam_cigar_oplen(new_cigar[t]) == 0) --t; // squeeze out the zero-length operation
+ l = t + 1;
+ end_j = j; j -= len;
+ } else { // other CIGAR operations
+ new_cigar[l++] = cigar[k];
+ if (bam_cigar_type(op)&1) { // consume the query
+ if (i != j) { // no need to copy if i == j
+ int u, c, c0;
+ for (u = 0; u < len; ++u) { // construct the consensus
+ c = bam1_seqi(seq, i+u);
+ if (j + u < end_j) { // in an overlap
+ c0 = bam1_seqi(seq, j+u);
+ if (c != c0) { // a mismatch; choose the better base
+ if (qual[j+u] < qual[i+u]) { // the base in the 2nd segment is better
+ bam1_seq_seti(seq, j+u, c);
+ qual[j+u] = qual[i+u] - qual[j+u];
+ } else qual[j+u] -= qual[i+u]; // the 1st is better; reduce base quality
+ } else qual[j+u] = qual[j+u] > qual[i+u]? qual[j+u] : qual[i+u];
+ } else { // not in an overlap; copy over
+ bam1_seq_seti(seq, j+u, c);
+ qual[j+u] = qual[i+u];
+ }
+ }
+ }
+ i += len, j += len;
+ }
+ }
+ }
+ if (no_qual) qual[0] = 0xff; // in very rare cases, this may be modified
+ // merge adjacent operations if possible
+ for (k = 1; k < l; ++k)
+ if (bam_cigar_op(new_cigar[k]) == bam_cigar_op(new_cigar[k-1]))
+ new_cigar[k] += new_cigar[k-1] >> BAM_CIGAR_SHIFT << BAM_CIGAR_SHIFT, new_cigar[k-1] &= 0xf;
+ // kill zero length operations
+ for (k = i = 0; k < l; ++k)
+ if (new_cigar[k] >> BAM_CIGAR_SHIFT)
+ new_cigar[i++] = new_cigar[k];
+ l = i;
+ // update b
+ memcpy(cigar, new_cigar, l * 4); // set CIGAR
+ p = b->data + b->core.l_qname + l * 4;
+ memmove(p, seq, (j+1)>>1); p += (j+1)>>1; // set SEQ
+ memmove(p, qual, j); p += j; // set QUAL
+ memmove(p, bam1_aux(b), b->l_aux); p += b->l_aux; // set optional fields
+ b->core.n_cigar = l, b->core.l_qseq = j; // update CIGAR length and query length
+ b->data_len = p - b->data; // update record length
+ return 0;
+
+rmB_err:
+ b->core.flag |= BAM_FUNMAP;
+ return -1;
+}
@copyright Genome Research Ltd.
*/
-#define BAM_VERSION "0.1.18 (r982:295)"
+#define BAM_VERSION "0.1.18-dev (r982:310)"
#include <stdint.h>
#include <stdlib.h>
char **target_name;
uint32_t *target_len;
void *dict, *hash, *rg2lib;
- size_t l_text, n_text;
+ uint32_t l_text, n_text;
char *text;
} bam_header_t;
/*! @abstract CIGAR: P = padding */
#define BAM_CPAD 6
/*! @abstract CIGAR: equals = match */
-#define BAM_CEQUAL 7
+#define BAM_CEQUAL 7
/*! @abstract CIGAR: X = mismatch */
-#define BAM_CDIFF 8
+#define BAM_CDIFF 8
+#define BAM_CBACK 9
-#define BAM_CIGAR_STR "MIDNSHP=X"
+#define BAM_CIGAR_STR "MIDNSHP=XB"
+#define BAM_CIGAR_TYPE 0x3C1A7
#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
-#define bam_cigar_gen(o, l) ((o)<<BAM_CIGAR_SHIFT|(l))
+#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
+#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference
/*! @typedef
@abstract Structure for core alignment information.
@param i The i-th position, 0-based
@return 4-bit integer representing the base.
*/
-#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
+//#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
+#define bam1_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)
+
+#define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) )
/*! @function
@abstract Get query sequence and quality
*/
extern int bam_verbose;
+extern int bam_no_B;
+
/*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */
extern unsigned char bam_nt16_table[256];
*/
int bam_read1(bamFile fp, bam1_t *b);
+ int bam_remove_B(bam1_t *b);
+
/*!
@abstract Write an alignment to BAM.
@param fp BAM file handler
}
// fill the bases array
memset(r, 0, sizeof(bcf_callret1_t));
- for (i = n = 0; i < _n; ++i) {
+ for (i = n = r->n_supp = 0; i < _n; ++i) {
const bam_pileup1_t *p = pl + i;
int q, b, mapQ, baseQ, is_diff, min_dist, seqQ;
// set base
b = p->aux>>16&0x3f;
is_diff = (b != 0);
}
+ if (is_diff) ++r->n_supp;
bca->bases[n++] = q<<5 | (int)bam1_strand(p->b)<<4 | b;
// collect annotations
if (b < 4) r->qsum[b] += q;
return 0;
}
-int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP,
+int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag,
const bcf_callaux_t *bca, const char *ref)
{
extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two);
if (i) kputc(',', &s);
kputw(bc->anno[i], &s);
}
- if ( bc->vdb!=1 )
- {
+ if (bc->vdb != 1)
ksprintf(&s, ";VDB=%.4f", bc->vdb);
- }
kputc('\0', &s);
// FMT
kputs("PL", &s);
- if (bcr) {
- kputs(":DP", &s);
- if (is_SP) kputs(":SP", &s);
+ if (bcr && fmt_flag) {
+ if (fmt_flag & B2B_FMT_DP) kputs(":DP", &s);
+ if (fmt_flag & B2B_FMT_DV) kputs(":DV", &s);
+ if (fmt_flag & B2B_FMT_SP) kputs(":SP", &s);
}
kputc('\0', &s);
b->m_str = s.m; b->str = s.s; b->l_str = s.l;
bcf_sync(b);
memcpy(b->gi[0].data, bc->PL, b->gi[0].len * bc->n);
- if (bcr) {
- uint16_t *dp = (uint16_t*)b->gi[1].data;
- int32_t *sp = is_SP? b->gi[2].data : 0;
+ if (bcr && fmt_flag) {
+ uint16_t *dp = (fmt_flag & B2B_FMT_DP)? b->gi[1].data : 0;
+ uint16_t *dv = (fmt_flag & B2B_FMT_DV)? b->gi[1 + ((fmt_flag & B2B_FMT_DP) != 0)].data : 0;
+ int32_t *sp = (fmt_flag & B2B_FMT_DV)? b->gi[1 + ((fmt_flag & B2B_FMT_DP) != 0) + ((fmt_flag & B2B_FMT_DV) != 0)].data : 0;
for (i = 0; i < bc->n; ++i) {
bcf_callret1_t *p = bcr + i;
- dp[i] = p->depth < 0xffff? p->depth : 0xffff;
- if (is_SP) {
+ if (dp) dp[i] = p->depth < 0xffff? p->depth : 0xffff;
+ if (dv) dv[i] = p->n_supp < 0xffff? p->n_supp : 0xffff;
+ if (sp) {
if (p->anno[0] + p->anno[1] < 2 || p->anno[2] + p->anno[3] < 2
|| p->anno[0] + p->anno[2] < 2 || p->anno[1] + p->anno[3] < 2)
{
#define B2B_INDEL_NULL 10000
+#define B2B_FMT_DP 0x1
+#define B2B_FMT_SP 0x2
+#define B2B_FMT_DV 0x4
+
typedef struct __bcf_callaux_t {
int capQ, min_baseQ;
int openQ, extQ, tandemQ; // for indels
} bcf_callaux_t;
typedef struct {
- int depth, ori_depth, qsum[4];
+ int depth, n_supp, ori_depth, qsum[4];
int anno[16];
float p[25];
int mvd[3]; // mean variant distance, number of variant reads, average read length
typedef struct {
int a[5]; // alleles: ref, alt, alt2, alt3
int n, n_alleles, shift, ori_ref, unseen;
+ int n_supp; // number of supporting non-reference reads
int anno[16], depth, ori_depth;
uint8_t *PL;
float vdb; // variant distance bias
void bcf_call_destroy(bcf_callaux_t *bca);
int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r);
int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call);
- int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP,
+ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag,
const bcf_callaux_t *bca, const char *ref);
int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
const void *rghash);
// Sanity check
if ( header->l_text+str->l+1 >= header->n_text )
{
- fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n", header->l_text+str->l+1,header->n_text,x,y);
+ fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n", header->l_text+str->l+1,(long)header->n_text,x,y);
abort();
}
strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here.
if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -3;
z += str->l + 1;
if (str->s[0] != '*') {
+ uint32_t *cigar;
for (s = str->s; *s; ++s) {
if ((isalpha(*s)) || (*s=='=')) ++c->n_cigar;
else if (!isdigit(*s)) parse_error(fp->n_lines, "invalid CIGAR character");
}
b->data = alloc_data(b, doff + c->n_cigar * 4);
+ cigar = bam1_cigar(b);
for (i = 0, s = str->s; i != c->n_cigar; ++i) {
x = strtol(s, &t, 10);
op = toupper(*t);
else if (op == 'P') op = BAM_CPAD;
else if (op == '=') op = BAM_CEQUAL;
else if (op == 'X') op = BAM_CDIFF;
+ else if (op == 'B') op = BAM_CBACK;
else parse_error(fp->n_lines, "invalid CIGAR operation");
s = t + 1;
- bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op;
+ cigar[i] = bam_cigar_gen(x, op);
}
if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation");
- c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b)));
+ c->bin = bam_reg2bin(c->pos, bam_calend(c, cigar));
doff += c->n_cigar * 4;
} else {
if (!(c->flag&BAM_FUNMAP)) {
if (strcmp(str->s, "*")) {
c->l_qseq = strlen(str->s);
if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) {
- fprintf(stderr, "Line %ld, sequence length %i vs %i from CIGAR\n",
- (long)fp->n_lines, c->l_qseq, (int32_t)bam_cigar2qlen(c, bam1_cigar(b)));
- parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
+ fprintf(stderr, "Line %ld, sequence length %i vs %i from CIGAR\n",
+ (long)fp->n_lines, c->l_qseq, (int32_t)bam_cigar2qlen(c, bam1_cigar(b)));
+ parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
}
p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;
memset(p, 0, (c->l_qseq+1)/2);
}
b->l_aux = doff - doff0;
b->data_len = doff;
+ if (bam_no_B) bam_remove_B(b);
return z;
}
{
bam_header_t *header;
bam1_t *b[2];
- int curr, has_prev, pre_end, cur_end;
+ int curr, has_prev, pre_end = 0, cur_end;
kstring_t str;
str.l = str.m = 0; str.s = 0;
#define MPLP_NO_COMP 0x20
#define MPLP_NO_ORPHAN 0x40
#define MPLP_REALN 0x80
-#define MPLP_FMT_DP 0x100
-#define MPLP_FMT_SP 0x200
#define MPLP_NO_INDEL 0x400
#define MPLP_EXT_BAQ 0x800
#define MPLP_ILLUMINA13 0x1000
int bed_overlap(const void *_h, const char *chr, int beg, int end);
typedef struct {
- int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth;
+ int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
int openQ, extQ, tandemQ, min_support; // for indels
double min_frac; // for indels
char *reg, *pl_list;
for (i = 0; i < gplp.n; ++i)
bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
bcf_call_combine(gplp.n, bcr, ref16, &bc);
- bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
- (conf->flag&MPLP_FMT_SP), 0, 0);
+ bcf_call2bcf(tid, pos, &bc, b, bcr, conf->fmt_flag, 0, 0);
bcf_write(bp, bh, b);
bcf_destroy(b);
// call indels
bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) {
b = calloc(1, sizeof(bcf1_t));
- bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
- (conf->flag&MPLP_FMT_SP), bca, ref);
+ bcf_call2bcf(tid, pos, &bc, b, bcr, conf->fmt_flag, bca, ref);
bcf_write(bp, bh, b);
bcf_destroy(b);
}
} else {
printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
for (i = 0; i < n; ++i) {
- int j;
- printf("\t%d\t", n_plp[i]);
+ int j, cnt;
+ for (j = cnt = 0; j < n_plp[i]; ++j) {
+ const bam_pileup1_t *p = plp[i] + j;
+ if (bam1_qual(p->b)[p->qpos] >= conf->min_baseQ) ++cnt;
+ }
+ printf("\t%d\t", cnt);
if (n_plp[i] == 0) {
printf("*\t*"); // FIXME: printf() is very slow...
if (conf->flag & MPLP_PRINT_POS) printf("\t*");
} else {
- for (j = 0; j < n_plp[i]; ++j)
- pileup_seq(plp[i] + j, pos, ref_len, ref);
+ for (j = 0; j < n_plp[i]; ++j) {
+ const bam_pileup1_t *p = plp[i] + j;
+ if (bam1_qual(p->b)[p->qpos] >= conf->min_baseQ)
+ pileup_seq(plp[i] + j, pos, ref_len, ref);
+ }
putchar('\t');
for (j = 0; j < n_plp[i]; ++j) {
const bam_pileup1_t *p = plp[i] + j;
- int c = bam1_qual(p->b)[p->qpos] + 33;
- if (c > 126) c = 126;
- putchar(c);
+ int c = bam1_qual(p->b)[p->qpos];
+ if (c >= conf->min_baseQ) {
+ c = c + 33 < 126? c + 33 : 126;
+ putchar(c);
+ }
}
if (conf->flag & MPLP_PRINT_MAPQ) {
putchar('\t');
int nfiles = 0, use_orphan = 0;
mplp_conf_t mplp;
memset(&mplp, 0, sizeof(mplp_conf_t));
- #define MPLP_PRINT_POS 0x4000
mplp.max_mq = 60;
mplp.min_baseQ = 13;
mplp.capQ_thres = 0;
mplp.max_depth = 250; mplp.max_indel_depth = 250;
mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
mplp.min_frac = 0.002; mplp.min_support = 1;
- mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
- while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6Os")) >= 0) {
+ mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_EXT_BAQ;
+ while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6OsV")) >= 0) {
switch (c) {
case 'f':
mplp.fai = fai_load(optarg);
case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break;
case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break;
case 'B': mplp.flag &= ~MPLP_REALN; break;
- case 'D': mplp.flag |= MPLP_FMT_DP; break;
- case 'S': mplp.flag |= MPLP_FMT_SP; break;
+ case 'D': mplp.fmt_flag |= B2B_FMT_DP; break;
+ case 'S': mplp.fmt_flag |= B2B_FMT_SP; break;
+ case 'V': mplp.fmt_flag |= B2B_FMT_DV; break;
case 'I': mplp.flag |= MPLP_NO_INDEL; break;
case 'E': mplp.flag |= MPLP_EXT_BAQ; break;
case '6': mplp.flag |= MPLP_ILLUMINA13; break;
fprintf(stderr, " -b FILE list of input BAM files [null]\n");
fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n");
fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth);
- fprintf(stderr, " -E extended BAQ for higher sensitivity but lower specificity\n");
+// fprintf(stderr, " -E extended BAQ for higher sensitivity but lower specificity\n");
fprintf(stderr, " -f FILE faidx indexed reference sequence file [null]\n");
fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n");
fprintf(stderr, " -l FILE list of positions (chr pos) or regions (BED) [null]\n");
fprintf(stderr, "Notes: Assuming diploid individuals.\n\n");
return 1;
}
+ bam_no_B = 1;
if (file_list) {
if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
mpileup(&mplp,nfiles,fn);
fprintf(stderr, " cat concatenate BAMs\n");
fprintf(stderr, " targetcut cut fosmid regions (for fosmid pool only)\n");
fprintf(stderr, " phase phase heterozygotes\n");
- fprintf(stderr, " pad2unpad convert padded BAM to unpadded BAM\n");
+// fprintf(stderr, " depad convert padded BAM to unpadded BAM\n"); // not stable
fprintf(stderr, "\n");
#ifdef _WIN32
fprintf(stderr, "\
else if (strcmp(argv[1], "depth") == 0) return main_depth(argc-1, argv+1);
else if (strcmp(argv[1], "bam2fq") == 0) return main_bam2fq(argc-1, argv+1);
else if (strcmp(argv[1], "pad2unpad") == 0) return main_pad2unpad(argc-1, argv+1);
+ else if (strcmp(argv[1], "depad") == 0) return main_pad2unpad(argc-1, argv+1);
else if (strcmp(argv[1], "pileup") == 0) {
fprintf(stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
return 1;
for (i = 0; i < b->n_gi; ++i) {
if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2)) {
+ } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
b->gi[i].len = 2;
} else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) {
b->gi[i].len = 1;
if (k > 0) kputc(',', s);
kputw(d[k], s);
}
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
+ } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
kputw(((uint16_t*)b->gi[i].data)[j], s);
} else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
kputw(((uint8_t*)b->gi[i].data)[j], s);
((uint8_t*)b->gi[i].data)[k-9] = 0;
} else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
((int32_t*)b->gi[i].data)[k-9] = 0;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
+ } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
((uint16_t*)b->gi[i].data)[k-9] = 0;
} else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
int y = b->n_alleles * (b->n_alleles + 1) / 2;
int x = strtol(q, &q, 10);
if (x > 0xffff) x = 0xffff;
((uint32_t*)b->gi[i].data)[k-9] = x;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
+ } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
int x = strtol(q, &q, 10);
if (x > 0xffff) x = 0xffff;
((uint16_t*)b->gi[i].data)[k-9] = x;
{
bamFile in, out;
if (argc == 1) {
- fprintf(stderr, "Usage: samtools pad2unpad <in.bam>\n");
+ fprintf(stderr, "Usage: samtools depad <in.bam>\n");
return 1;
}
in = strcmp(argv[1], "-")? bam_open(argv[1], "r") : bam_dopen(fileno(stdin), "r");
// FIXME: we'd better use no global variables...
static rghash_t g_rghash = 0;
-static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
+static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0, g_qual_scale = 0;
static float g_subsam = -1;
static char *g_library, *g_rg;
static void *g_bed;
void bed_destroy(void *_h);
int bed_overlap(const void *_h, const char *chr, int beg, int end);
-static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)
+static int process_aln(const bam_header_t *h, bam1_t *b)
{
+ if (g_qual_scale > 1) {
+ int i;
+ uint8_t *qual = bam1_qual(b);
+ for (i = 0; i < b->core.l_qseq; ++i) {
+ int c = qual[i] * g_qual_scale;
+ qual[i] = c < 93? c : 93;
+ }
+ }
if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))
return 1;
if (g_bed && b->core.tid >= 0 && !bed_overlap(g_bed, h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b))))
// callback function for bam_fetch() that prints nonskipped records
static int view_func(const bam1_t *b, void *data)
{
- if (!__g_skip_aln(((samfile_t*)data)->header, b))
+ if (!process_aln(((samfile_t*)data)->header, (bam1_t*)b))
samwrite((samfile_t*)data, b);
return 0;
}
// callback function for bam_fetch() that counts nonskipped records
static int count_func(const bam1_t *b, void *data)
{
- if (!__g_skip_aln(((count_func_data_t*)data)->header, b)) {
+ if (!process_aln(((count_func_data_t*)data)->header, (bam1_t*)b)) {
(*((count_func_data_t*)data)->count)++;
}
return 0;
/* parse command-line options */
strcpy(in_mode, "r"); strcpy(out_mode, "w");
- while ((c = getopt(argc, argv, "Sbct:h1Ho:q:f:F:ul:r:xX?T:R:L:s:")) >= 0) {
+ while ((c = getopt(argc, argv, "SbBct:h1Ho:q:f:F:ul:r:xX?T:R:L:s:Q:")) >= 0) {
switch (c) {
case 's': g_subsam = atof(optarg); break;
case 'c': is_count = 1; break;
case 'X': of_type = BAM_OFSTR; break;
case '?': is_long_help = 1; break;
case 'T': fn_ref = strdup(optarg); is_bamin = 0; break;
+ case 'B': bam_no_B = 1; break;
+ case 'Q': g_qual_scale = atoi(optarg); break;
default: return usage(is_long_help);
}
}
bam1_t *b = bam_init1();
int r;
while ((r = samread(in, b)) >= 0) { // read one alignment from `in'
- if (!__g_skip_aln(in->header, b)) {
+ if (!process_aln(in->header, b)) {
if (!is_count) samwrite(out, b); // write the alignment to `out'
count++;
}
fprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\n");
fprintf(stderr, " -X output FLAG in string (samtools-C specific)\n");
fprintf(stderr, " -c print only the count of matching records\n");
+ fprintf(stderr, " -B collapse the backward CIGAR operation\n");
fprintf(stderr, " -L FILE output alignments overlapping the input BED FILE [null]\n");
fprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\n");
fprintf(stderr, " -T FILE reference sequence file (force -S) [null]\n");