AOBJS= bam_tview.o bam_plcmd.o sam_view.o \
bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \
bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \
- cut_target.o phase.o bam2depth.o padding.o
+ cut_target.o phase.o bam2depth.o padding.o bedcov.o
PROG= samtools
INCLUDES= -I.
SUBDIRS= . bcftools misc
@copyright Genome Research Ltd.
*/
-#define BAM_VERSION "0.1.18-master-r567"
+#define BAM_VERSION "0.1.18-r572"
#include <stdint.h>
#include <stdlib.h>
int main_depth(int argc, char *argv[]);
int main_bam2fq(int argc, char *argv[]);
int main_pad2unpad(int argc, char *argv[]);
+int main_bedcov(int argc, char *argv[]);
int faidx_main(int argc, char *argv[]);
fprintf(stderr, " rmdup remove PCR duplicates\n");
fprintf(stderr, " reheader replace BAM header\n");
fprintf(stderr, " cat concatenate BAMs\n");
+ fprintf(stderr, " bedcov read depth per BED region\n");
fprintf(stderr, " targetcut cut fosmid regions (for fosmid pool only)\n");
fprintf(stderr, " phase phase heterozygotes\n");
// fprintf(stderr, " depad convert padded BAM to unpadded BAM\n"); // not stable
else if (strcmp(argv[1], "bam2fq") == 0) return main_bam2fq(argc-1, argv+1);
else if (strcmp(argv[1], "pad2unpad") == 0) return main_pad2unpad(argc-1, argv+1);
else if (strcmp(argv[1], "depad") == 0) return main_pad2unpad(argc-1, argv+1);
+ else if (strcmp(argv[1], "bedcov") == 0) return main_bedcov(argc-1, argv+1);
else if (strcmp(argv[1], "pileup") == 0) {
fprintf(stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
return 1;
--- /dev/null
+#include <zlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include "kstring.h"
+#include "bgzf.h"
+#include "bam.h"
+
+#include "kseq.h"
+KSTREAM_INIT(gzFile, gzread, 16384)
+
+typedef struct {
+ bamFile fp;
+ bam_iter_t iter;
+ int min_mapQ;
+} aux_t;
+
+static int read_bam(void *data, bam1_t *b)
+{
+ aux_t *aux = (aux_t*)data;
+ int ret = bam_iter_read(aux->fp, aux->iter, b);
+ if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP;
+ return ret;
+}
+
+int main_bedcov(int argc, char *argv[])
+{
+ extern void bam_init_header_hash(bam_header_t*);
+ gzFile fp;
+ kstring_t str;
+ kstream_t *ks;
+ bam_index_t **idx;
+ bam_header_t *h = 0;
+ aux_t **aux;
+ int *n_plp, dret, i, n, c, min_mapQ = 0;
+ int64_t *cnt;
+ const bam_pileup1_t **plp;
+
+ while ((c = getopt(argc, argv, "Q:")) >= 0) {
+ switch (c) {
+ case 'Q': min_mapQ = atoi(optarg); break;
+ }
+ }
+ if (optind + 2 > argc) {
+ fprintf(stderr, "Usage: samtools bedcov <in.bed> <in1.bam> [...]\n");
+ return 1;
+ }
+ memset(&str, 0, sizeof(kstring_t));
+ n = argc - optind - 1;
+ aux = calloc(n, sizeof(void*));
+ idx = calloc(n, sizeof(void*));
+ for (i = 0; i < n; ++i) {
+ aux[i] = calloc(1, sizeof(aux_t));
+ aux[i]->min_mapQ = min_mapQ;
+ aux[i]->fp = bam_open(argv[i+optind+1], "r");
+ idx[i] = bam_index_load(argv[i+optind+1]);
+ if (aux[i]->fp == 0 || idx[i] == 0) {
+ fprintf(stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
+ return 2;
+ }
+ bgzf_set_cache_size(aux[i]->fp, 20);
+ if (i == 0) h = bam_header_read(aux[0]->fp);
+ }
+ bam_init_header_hash(h);
+ cnt = calloc(n, 8);
+
+ fp = gzopen(argv[optind], "rb");
+ ks = ks_init(fp);
+ n_plp = calloc(n, sizeof(int));
+ plp = calloc(n, sizeof(void*));
+ while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
+ char *p, *q;
+ int tid, beg, end, pos;
+ bam_mplp_t mplp;
+
+ for (p = q = str.s; *p && *p != '\t'; ++p);
+ if (*p != '\t') goto bed_error;
+ *p = 0; tid = bam_get_tid(h, q); *p = '\t';
+ if (tid < 0) goto bed_error;
+ for (q = p = p + 1; isdigit(*p); ++p);
+ if (*p != '\t') goto bed_error;
+ *p = 0; beg = atoi(q); *p = '\t';
+ for (q = p = p + 1; isdigit(*p); ++p);
+ if (*p == '\t' || *p == 0) {
+ int c = *p;
+ *p = 0; end = atoi(q); *p = c;
+ } else goto bed_error;
+
+ for (i = 0; i < n; ++i) {
+ if (aux[i]->iter) bam_iter_destroy(aux[i]->iter);
+ aux[i]->iter = bam_iter_query(idx[i], tid, beg, end);
+ }
+ mplp = bam_mplp_init(n, read_bam, (void**)aux);
+ bam_mplp_set_maxcnt(mplp, 64000);
+ memset(cnt, 0, 8 * n);
+ while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
+ if (pos >= beg && pos < end)
+ for (i = 0; i < n; ++i) cnt[i] += n_plp[i];
+ for (i = 0; i < n; ++i) {
+ kputc('\t', &str);
+ kputl(cnt[i], &str);
+ }
+ puts(str.s);
+ bam_mplp_destroy(mplp);
+ continue;
+
+bed_error:
+ fprintf(stderr, "Errors in BED line '%s'\n", str.s);
+ }
+ free(n_plp); free(plp);
+ ks_destroy(ks);
+ gzclose(fp);
+
+ free(cnt);
+ for (i = 0; i < n; ++i) {
+ if (aux[i]->iter) bam_iter_destroy(aux[i]->iter);
+ bam_index_destroy(idx[i]);
+ bam_close(aux[i]->fp);
+ free(aux[i]);
+ }
+ bam_header_destroy(h);
+ free(aux); free(idx);
+ free(str.s);
+ return 0;
+}
SOFTWARE.
*/
-/* Last Modified: 18AUG2011 */
+/* Last Modified: 05MAR2012 */
#ifndef AC_KSEQ_H
#define AC_KSEQ_H
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define KS_SEP_TAB 1 // isspace() && !' '
-#define KS_SEP_MAX 1
+#define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
+#define KS_SEP_MAX 2
#define __KS_TYPE(type_t) \
typedef struct __kstream_t { \
{ \
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
ks->f = f; \
- ks->buf = malloc(__bufsize); \
+ ks->buf = (unsigned char*)malloc(__bufsize); \
return ks; \
} \
static inline void ks_destroy(kstream_t *ks) \
if (ks->end == 0) break; \
} else break; \
} \
- if (delimiter > KS_SEP_MAX) { \
+ if (delimiter == KS_SEP_LINE) { \
+ for (i = ks->begin; i < ks->end; ++i) \
+ if (ks->buf[i] == '\n') break; \
+ } else if (delimiter > KS_SEP_MAX) { \
for (i = ks->begin; i < ks->end; ++i) \
if (ks->buf[i] == delimiter) break; \
} else if (delimiter == KS_SEP_SPACE) { \
for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
} else i = 0; /* never come to here! */ \
- if (str->m - str->l < i - ks->begin + 1) { \
+ if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
str->m = str->l + (i - ks->begin) + 1; \
kroundup32(str->m); \
str->s = (char*)realloc(str->s, str->m); \
if (str->s == 0) { \
str->m = 1; \
str->s = (char*)calloc(1, 1); \
- } \
+ } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
str->s[str->l] = '\0'; \
return str->l; \
} \
__KS_GETC(__read, __bufsize) \
__KS_GETUNTIL(__read, __bufsize)
-#define __KSEQ_BASIC(type_t) \
- static inline kseq_t *kseq_init(type_t fd) \
+#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
+
+#define __KSEQ_BASIC(SCOPE, type_t) \
+ SCOPE kseq_t *kseq_init(type_t fd) \
{ \
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
s->f = ks_init(fd); \
return s; \
} \
- static inline void kseq_rewind(kseq_t *ks) \
- { \
- ks->last_char = 0; \
- ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
- } \
- static inline void kseq_destroy(kseq_t *ks) \
+ SCOPE void kseq_destroy(kseq_t *ks) \
{ \
if (!ks) return; \
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
-1 end-of-file
-2 truncated quality string
*/
-#define __KSEQ_READ \
- static int kseq_read(kseq_t *seq) \
+#define __KSEQ_READ(SCOPE) \
+ SCOPE int kseq_read(kseq_t *seq) \
{ \
int c; \
kstream_t *ks = seq->f; \
} /* else: the first header char has been read in the previous call */ \
seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
- if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); /* read FASTA/Q comment */ \
+ if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
seq->seq.m = 256; \
seq->seq.s = (char*)malloc(seq->seq.m); \
} \
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+ if (c == '\n') continue; /* skip empty lines */ \
seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
- ks_getuntil2(ks, '\n', &seq->seq, 0, 1); /* read the rest of the line */ \
+ ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
} \
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
} \
while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
if (c == -1) return -2; /* error: no quality string */ \
- while (ks_getuntil2(ks, '\n', &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
+ while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
seq->last_char = 0; /* we have not come to the next header line */ \
if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
return seq->seq.l; \
kstream_t *f; \
} kseq_t;
-#define KSEQ_INIT(type_t, __read) \
+#define KSEQ_INIT2(SCOPE, type_t, __read) \
KSTREAM_INIT(type_t, __read, 16384) \
__KSEQ_TYPE(type_t) \
- __KSEQ_BASIC(type_t) \
- __KSEQ_READ
+ __KSEQ_BASIC(SCOPE, type_t) \
+ __KSEQ_READ(SCOPE)
+
+#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
+
+#define KSEQ_DECLARE(type_t) \
+ __KS_TYPE(type_t) \
+ __KSEQ_TYPE(type_t) \
+ extern kseq_t *kseq_init(type_t fd); \
+ void kseq_destroy(kseq_t *ks); \
+ int kseq_read(kseq_t *seq);
#endif
return 0;
}
+static inline int kputl(long c, kstring_t *s)
+{
+ char buf[32];
+ long l, x;
+ if (c == 0) return kputc('0', s);
+ for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
+ if (c < 0) buf[l++] = '-';
+ if (s->l + l + 1 >= s->m) {
+ s->m = s->l + l + 2;
+ kroundup32(s->m);
+ s->s = (char*)realloc(s->s, s->m);
+ }
+ for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
+ s->s[s->l] = 0;
+ return 0;
+}
+
static inline int *ksplit(kstring_t *s, int delimiter, int *n)
{
int max = 0, *offsets = 0;
}
}
-size_t getline(char **line, size_t *n, FILE *fp)
+size_t mygetline(char **line, size_t *n, FILE *fp)
{
if (line == NULL || n == NULL || fp == NULL)
{
ssize_t nread;
int warned = 0;
int prev_tid=-1, prev_pos=-1;
- while ((nread = getline(&line, &len, fp)) != -1)
+ while ((nread = mygetline(&line, &len, fp)) != -1)
{
if ( line[0] == '#' ) continue;