DFLAGS= -D_IOLIB=2 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 #-D_NO_RAZF #-D_NO_CURSES
OBJS= bam.o bam_import.o bam_pileup.o bam_lpileup.o bam_sort.o bam_index.o \
razf.o bgzf.o faidx.o bam_tview.o bam_maqcns.o bam_aux.o bam_plcmd.o \
- bam_mate.o bam_rmdup.o glf.o bam_stat.o kstring.o bam_md.o
+ bam_mate.o bam_rmdup.o glf.o bam_stat.o kstring.o bam_md.o sam.o sam_view.o
PROG= razip bgzip samtools
INCLUDES=
LIBS= -lm -lz
$(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(LIBS)
razip.o:razf.h
-bam.o:bam.h razf.h bam_endian.h
+bam.o:bam.h razf.h bam_endian.h kstring.h
+sam.o:sam.h bam.h
bam_import.o:bam.h kseq.h khash.h razf.h
bam_pileup.o:bam.h razf.h ksort.h
bam_plcmd.o:bam.h faidx.h bam_maqcns.h glf.h
DFLAGS= -D_IOLIB=2 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_NO_CURSES -D_NO_RAZF
OBJS= bam.o bam_import.o bam_pileup.o bam_lpileup.o bam_sort.o bam_index.o \
bgzf.o faidx.o bam_tview.o bam_maqcns.o bam_aux.o bam_plcmd.o \
- bam_mate.o bam_rmdup.o glf.o bam_stat.o kstring.o bam_md.o
+ bam_mate.o bam_rmdup.o glf.o bam_stat.o kstring.o bam_md.o sam.o sam_view.o
PROG= samtools
INCLUDES=
LIBS= -lm -lz
$(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(LIBS)
razip.o:razf.h
-bam.o:bam.h razf.h bam_endian.h
+bam.o:bam.h razf.h bam_endian.h kstring.h
+sam.o:sam.h bam.h
bam_import.o:bam.h kseq.h khash.h razf.h
bam_pileup.o:bam.h razf.h ksort.h
-bam_plcmd.o:bam.h faidx.h bam_maqcns.h
+bam_plcmd.o:bam.h faidx.h bam_maqcns.h glf.h
bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h
bam_lpileup.o:bam.h ksort.h
bam_tview.o:bam.h faidx.h bam_maqcns.h
bam_sort.o:bam.h ksort.h razf.h
bam_md.o:bam.h faidx.h
razf.o:razf.h
+glf.o:glf.h
faidx.o:faidx.h razf.h khash.h
faidx_main.o:faidx.h razf.h
#include <ctype.h>
#include "bam.h"
#include "bam_endian.h"
+#include "kstring.h"
int bam_is_be = 0;
return bam_write1_core(fp, &b->core, b->data_len, b->data);
}
-void bam_view1(const bam_header_t *header, const bam1_t *b)
+char *bam_format1(const bam_header_t *header, const bam1_t *b)
{
uint8_t *s = bam1_seq(b), *t = bam1_qual(b);
int i;
const bam1_core_t *c = &b->core;
- printf("%s\t%d\t", bam1_qname(b), c->flag);
- if (c->tid < 0) printf("*\t");
- else printf("%s\t", header->target_name[c->tid]);
- printf("%d\t%d\t", c->pos + 1, c->qual);
- if (c->n_cigar == 0) putchar('*');
+ kstring_t str;
+ str.l = str.m = 0; str.s = 0;
+
+ ksprintf(&str, "%s\t%d\t", bam1_qname(b), c->flag);
+ if (c->tid < 0) kputs("*\t", &str);
+ else ksprintf(&str, "%s\t", header->target_name[c->tid]);
+ ksprintf(&str, "%d\t%d\t", c->pos + 1, c->qual);
+ if (c->n_cigar == 0) kputc('*', &str);
else {
for (i = 0; i < c->n_cigar; ++i)
- printf("%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]);
+ ksprintf(&str, "%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]);
}
- putchar('\t');
- if (c->mtid < 0) printf("*\t");
- else if (c->mtid == c->tid) printf("=\t");
- else printf("%s\t", header->target_name[c->mtid]);
- printf("%d\t%d\t", c->mpos + 1, c->isize);
- for (i = 0; i < c->l_qseq; ++i) putchar(bam_nt16_rev_table[bam1_seqi(s, i)]);
- putchar('\t');
- if (t[0] == 0xff) putchar('*');
- else for (i = 0; i < c->l_qseq; ++i) putchar(t[i] + 33);
+ kputc('\t', &str);
+ if (c->mtid < 0) kputs("*\t", &str);
+ else if (c->mtid == c->tid) kputs("=\t", &str);
+ else ksprintf(&str, "%s\t", header->target_name[c->mtid]);
+ ksprintf(&str, "%d\t%d\t", c->mpos + 1, c->isize);
+ for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
+ kputc('\t', &str);
+ if (t[0] == 0xff) kputc('*', &str);
+ else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
s = bam1_aux(b);
while (s < b->data + b->data_len) {
uint8_t type, key[2];
key[0] = s[0]; key[1] = s[1];
s += 2; type = *s; ++s;
- printf("\t%c%c:", key[0], key[1]);
- if (type == 'A') { printf("A:%c", *s); ++s; }
- else if (type == 'C') { printf("i:%u", *s); ++s; }
- else if (type == 'c') { printf("i:%d", *s); ++s; }
- else if (type == 'S') { printf("i:%u", *(uint16_t*)s); s += 2; }
- else if (type == 's') { printf("i:%d", *(int16_t*)s); s += 2; }
- else if (type == 'I') { printf("i:%u", *(uint32_t*)s); s += 4; }
- else if (type == 'i') { printf("i:%d", *(int32_t*)s); s += 4; }
- else if (type == 'f') { printf("f:%g", *(float*)s); s += 4; }
- else if (type == 'd') { printf("d:%lg", *(double*)s); s += 8; }
- else if (type == 'Z' || type == 'H') { printf("%c:", type); while (*s) putchar(*s++); ++s; }
+ ksprintf(&str, "\t%c%c:", key[0], key[1]);
+ if (type == 'A') { ksprintf(&str, "A:%c", *s); ++s; }
+ else if (type == 'C') { ksprintf(&str, "i:%u", *s); ++s; }
+ else if (type == 'c') { ksprintf(&str, "i:%d", *s); ++s; }
+ else if (type == 'S') { ksprintf(&str, "i:%u", *(uint16_t*)s); s += 2; }
+ else if (type == 's') { ksprintf(&str, "i:%d", *(int16_t*)s); s += 2; }
+ else if (type == 'I') { ksprintf(&str, "i:%u", *(uint32_t*)s); s += 4; }
+ else if (type == 'i') { ksprintf(&str, "i:%d", *(int32_t*)s); s += 4; }
+ else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
+ else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
+ else if (type == 'Z' || type == 'H') { ksprintf(&str, "%c:", type); while (*s) kputc(*s++, &str); ++s; }
}
- putchar('\n');
+ return str.s;
+}
+
+void bam_view1(const bam_header_t *header, const bam1_t *b)
+{
+ char *s = bam_format1(header, b);
+ printf("%s\n", s);
}
} while (0)
/*!
- @abstract Print an alignment to the standard output in TAM format.
+ @abstract Format a BAM record in the SAM format
@param header pointer to the header structure
@param b alignment to print
+ @return a pointer to the SAM string
*/
- void bam_view1(const bam_header_t *header, const bam1_t *b);
+ char *bam_format1(const bam_header_t *header, const bam1_t *b);
/*!
@abstract Merge multiple sorted BAM.
}
int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
{
- int ret, doff, doff0, dret;
+ int ret, doff, doff0, dret, z = 0;
bam1_core_t *c = &b->core;
kstring_t *str = fp->str;
kstream_t *ks = fp->ks;
while ((ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret)) >= 0 && str->s[0] == '@') { // skip header
+ z += str->l + 1;
str->s[str->l] = dret; // note that str->s is NOT null terminated!!
append_text(header, str);
if (dret != '\n') {
ret = ks_getuntil(fp->ks, '\n', str, &dret);
+ z += str->l + 1;
str->s[str->l] = '\n'; // NOT null terminated!!
append_text(header, str);
}
++fp->n_lines;
}
- while (ret == 0) ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret); // special consideration for "\r\n"
+ while (ret == 0) { // special consideration for "\r\n" and empty lines
+ ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret);
+ if (ret >= 0) z += str->l + 1;
+ }
if (ret < 0) return -1;
++fp->n_lines;
doff = 0;
doff += c->l_qname;
}
{ // flag, tid, pos, qual
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->flag = atoi(str->s);
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->tid = bam_get_tid(header, str->s);
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->qual = isdigit(str->s[0])? atoi(str->s) : 0;
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->flag = atoi(str->s);
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->tid = bam_get_tid(header, str->s);
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->qual = isdigit(str->s[0])? atoi(str->s) : 0;
if (ret < 0) return -2;
}
{ // cigar
long x;
c->n_cigar = 0;
if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -3;
+ z += str->l + 1;
if (str->s[0] != '*') {
for (s = str->s; *s; ++s) {
if (isalpha(*s)) ++c->n_cigar;
} else c->bin = bam_reg2bin(c->pos, c->pos + 1);
}
{ // mtid, mpos, isize
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0;
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
+ c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid;
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
+ c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
+ ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
+ c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0;
if (ret < 0) return -4;
}
{ // seq and qual
int i;
uint8_t *p;
if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq
+ z += str->l + 1;
c->l_qseq = strlen(str->s);
if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b)))
parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
for (i = 0; i < c->l_qseq; ++i)
p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);
if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual
+ z += str->l + 1;
if (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))
parse_error(fp->n_lines, "sequence and quality are inconsistent");
p += (c->l_qseq+1)/2;
if (dret != '\n' && dret != '\r') { // aux
while (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) {
uint8_t *s, type, key[2];
+ z += str->l + 1;
if (str->l < 6 || str->s[2] != ':' || str->s[4] != ':')
parse_error(fp->n_lines, "missing colon in auxiliary data");
key[0] = str->s[0]; key[1] = str->s[1];
}
b->l_aux = doff - doff0;
b->data_len = doff;
- return 0;
+ return z;
}
tamFile sam_open(const char *fn)
free(fp);
}
}
-
-static void taf2baf_core(const char *fntaf, const char *fnbaf, bam_header_t *header)
-{
- bamFile fpbaf;
- bam1_t *b;
- tamFile fp;
- int ret;
-
- b = (bam1_t*)calloc(1, sizeof(bam1_t));
- fpbaf = (strcmp(fnbaf, "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(fnbaf, "w");
- fp = sam_open(fntaf);
- ret = sam_read1(fp, header, b);
- bam_header_write(fpbaf, header);
- if (ret >= 0) {
- bam_write1(fpbaf, b);
- while (sam_read1(fp, header, b) >= 0) bam_write1(fpbaf, b);
- }
- bam_close(fpbaf);
- free(b->data); free(b);
- sam_close(fp);
-}
-
-int bam_taf2baf(int argc, char *argv[])
-{
- int c;
- bam_header_t *header;
-
- while ((c = getopt(argc, argv, "")) >= 0) {
- }
- if (optind + 3 > argc) {
- fprintf(stderr, "Usage: bamtk import <in.ref_list> <in.sam> <out.bam>\n");
- return 1;
- }
- header = sam_header_read2(argv[optind]);
- taf2baf_core(argv[optind+1], argv[optind+2], header);
- bam_header_destroy(header);
- return 0;
-}
#include "bam.h"
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.3-10 (r257)"
+#define PACKAGE_VERSION "0.1.3-11 (r258)"
#endif
int bam_taf2baf(int argc, char *argv[]);
int bam_flagstat(int argc, char *argv[]);
int bam_fillmd(int argc, char *argv[]);
+int main_samview(int argc, char *argv[]);
+int main_import(int argc, char *argv[]);
+
int faidx_main(int argc, char *argv[]);
int glf3_view_main(int argc, char *argv[]);
-static int view_aux(const bam1_t *b, void *data)
-{
- bam_view1((bam_header_t*)data, b);
- return 0;
-}
-static int view_auxb(const bam1_t *b, void *data)
-{
- bam_write1((bamFile)data, b);
- return 0;
-}
-
-int bam_view(int argc, char *argv[])
-{
- bamFile fp, fpout = 0;
- bam_header_t *header;
- bam1_t *b;
- int ret, c, is_bam = 0, is_header = 0, is_headeronly = 0;
- while ((c = getopt(argc, argv, "bhH")) >= 0) {
- switch (c) {
- case 'b': is_bam = 1; break;
- case 'h': is_header = 1; break;
- case 'H': is_headeronly = 1; break;
- default: fprintf(stderr, "Unrecognized option: -%c\n", c); return 1;
- }
- }
- if (argc == optind) {
- fprintf(stderr, "Usage: samtools view [-bhH] <in.bam> [<region> [...]]\n");
- return 1;
- }
- fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
- assert(fp);
- header = bam_header_read(fp);
- if (header == 0) {
- fprintf(stderr, "[bam_view] fail to read the BAM header. Abort!\n");
- return 1;
- }
- if (is_bam) {
- assert(fpout = bam_dopen(fileno(stdout), "w"));
- bam_header_write(fpout, header);
- }
- if (is_header || is_headeronly) {
- int i, c;
- c = header->text[header->l_text-1];
- header->text[header->l_text-1] = 0;
- printf("%s", header->text);
- if (c) putchar(c);
- header->text[header->l_text-1] = c;
- for (i = 0; i < header->n_targets; ++i)
- printf("@SQ\tSN:%s\tLN:%d\n", header->target_name[i], header->target_len[i]);
- if (is_headeronly) {
- bam_header_destroy(header);
- bam_close(fp);
- return 0;
- }
- }
- if (optind + 1 == argc) {
- b = (bam1_t*)calloc(1, sizeof(bam1_t));
- while ((ret = bam_read1(fp, b)) >= 0) bam_view1(header, b);
- if (ret < -1) fprintf(stderr, "[bam_view] truncated file? Continue anyway. (%d)\n", ret);
- free(b->data); free(b);
- } else {
- int i;
- bam_index_t *idx;
- idx = bam_index_load(argv[optind]);
- for (i = optind + 1; i < argc; ++i) {
- int tid, beg, end;
- bam_parse_region(header, argv[i], &tid, &beg, &end);
- if (tid < 0) {
- fprintf(stderr, "[bam_view] fail to get the reference name. Abort!\n");
- return 1;
- }
- if (is_bam) bam_fetch(fp, idx, tid, beg, end, fpout, view_auxb);
- else bam_fetch(fp, idx, tid, beg, end, header, view_aux);
- }
- bam_index_destroy(idx);
- }
- bam_header_destroy(header);
- bam_close(fp);
- if (is_bam) bam_close(fpout);
- return 0;
-}
-
int bam_tagview(int argc, char *argv[])
{
bamFile fp;
fprintf(stderr, "Program: samtools (Tools for alignments in the SAM format)\n");
fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
fprintf(stderr, "Usage: samtools <command> [options]\n\n");
- fprintf(stderr, "Command: import import from the text format\n");
+ fprintf(stderr, "Command: import import from SAM (obsolete; use `view')\n");
fprintf(stderr, " view export to the text format\n");
fprintf(stderr, " sort sort alignment file\n");
fprintf(stderr, " merge merge multiple sorted alignment files\n");
int main(int argc, char *argv[])
{
if (argc < 2) return usage();
- if (strcmp(argv[1], "view") == 0) return bam_view(argc-1, argv+1);
- else if (strcmp(argv[1], "import") == 0) return bam_taf2baf(argc-1, argv+1);
+ if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);
+ else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);
else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1);
else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);
else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);
--- /dev/null
+#include <string.h>
+#include "sam.h"
+
+#define TYPE_BAM 1
+#define TYPE_READ 2
+
+bam_header_t *bam_header_dup(const bam_header_t *h0)
+{
+ bam_header_t *h;
+ int i;
+ h = bam_header_init();
+ *h = *h0;
+ h->hash = 0;
+ h->text = (char*)calloc(h->l_text + 1, 1);
+ memcpy(h->text, h0->text, h->l_text);
+ h->target_len = (uint32_t*)calloc(h->n_targets, 4);
+ h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
+ for (i = 0; i < h->n_targets; ++i) {
+ h->target_len[i] = h0->target_len[i];
+ h->target_name[i] = strdup(h0->target_name[i]);
+ }
+ return h;
+}
+
+bam_header_t *bam_header_parse(const char *text)
+{
+ bam_header_t *h;
+ int i;
+ char *s, *p, *q, *r;
+
+ i = strlen(text);
+ if (i < 3) return 0; // headerless
+ h = bam_header_init();
+ h->l_text = i;
+ h->text = strdup(text);
+ s = h->text;
+ while ((s = strstr(s, "@SQ")) != 0) {
+ ++h->n_targets;
+ s += 3;
+ }
+ h->target_len = (uint32_t*)calloc(h->n_targets, 4);
+ h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
+ i = 0;
+ s = h->text;
+ while ((s = strstr(s, "@SQ")) != 0) {
+ s += 3;
+ r = s;
+ if ((p = strstr(s, "SN:")) != 0) {
+ q = p + 3;
+ for (p = q; *p && *p != '\t'; ++p);
+ h->target_name[i] = (char*)calloc(p - q + 1, 1);
+ strncpy(h->target_name[i], q, p - q);
+ } else goto header_err_ret;
+ if (r < p) r = p;
+ if ((p = strstr(s, "LN:")) != 0) h->target_len[i] = strtol(p + 3, 0, 10);
+ else goto header_err_ret;
+ if (r < p) r = p;
+ s = r + 3;
+ ++i;
+ }
+ if (h->n_targets == 0) {
+ bam_header_destroy(h);
+ return 0;
+ } else return h;
+
+header_err_ret:
+ fprintf(stderr, "[bam_header_parse] missing SN tag in a @SQ line.\n");
+ bam_header_destroy(h);
+ return 0;
+}
+
+samfile_t *samopen(const char *fn, const char *mode, const void *aux)
+{
+ samfile_t *fp;
+ fp = (samfile_t*)calloc(1, sizeof(samfile_t));
+ if (mode[0] == 'r') {
+ const char *fn_list = (const char*)aux;
+ fp->type |= TYPE_READ;
+ if (mode[1] == 'b') { // binary
+ fp->type |= TYPE_BAM;
+ fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
+ fp->header = bam_header_read(fp->x.bam);
+ } else {
+ fp->x.tamr = sam_open(fn);
+ fp->header = sam_header_read2(fn_list);
+ }
+ } else if (mode[0] == 'w') {
+ fp->header = bam_header_dup((const bam_header_t*)aux);
+ if (mode[1] == 'b') { // binary
+ fp->type |= TYPE_BAM;
+ fp->x.bam = strcmp(fn, "-")? bam_open(fn, "w") : bam_dopen(fileno(stdout), "w");
+ bam_header_write(fp->x.bam, fp->header);
+ } else {
+ int i;
+ bam_header_t *alt = 0;
+ alt = bam_header_parse(fp->header->text);
+ fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
+ if (alt) {
+ if (alt->n_targets != fp->header->n_targets)
+ fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
+ bam_header_destroy(alt);
+ fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
+ }
+ if (strstr(mode, "h")) // print header
+ for (i = 0; i < fp->header->n_targets; ++i)
+ fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
+ }
+ }
+ return fp;
+}
+
+void samclose(samfile_t *fp)
+{
+ if (fp == 0) return;
+ if (fp->header) bam_header_destroy(fp->header);
+ if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
+ else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
+ else fclose(fp->x.tamw);
+ free(fp);
+}
+
+int samread(samfile_t *fp, bam1_t *b)
+{
+ if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
+ if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
+ else return sam_read1(fp->x.tamr, fp->header, b);
+}
+
+int samwrite(samfile_t *fp, const bam1_t *b)
+{
+ if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
+ if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
+ else {
+ char *s = bam_format1(fp->header, b);
+ int l = strlen(s);
+ fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
+ free(s);
+ return l + 1;
+ }
+}
--- /dev/null
+#ifndef BAM_SAM_H
+#define BAM_SAM_H
+
+#include "bam.h"
+
+typedef struct {
+ int type;
+ union {
+ tamFile tamr;
+ bamFile bam;
+ FILE *tamw;
+ } x;
+ bam_header_t *header;
+} samfile_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ // mode can be: r/w/rb/wb. On writing, aux points to bam_header_t; on reading, aux points to the name of fn_list for SAM
+ samfile_t *samopen(const char *fn, const char *mode, const void *aux);
+ void samclose(samfile_t *fp);
+ int samread(samfile_t *fp, bam1_t *b);
+ int samwrite(samfile_t *fp, const bam1_t *b);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "sam.h"
+
+static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
+
+// callback function for bam_fetch()
+static int view_func(const bam1_t *b, void *data)
+{
+ if (b->core.qual < g_min_mapQ) return 0;
+ if (g_flag_on && (b->core.flag & g_flag_on) == 0) return 0;
+ if (b->core.flag & g_flag_off) return 0;
+ samwrite((samfile_t*)data, b);
+ return 0;
+}
+
+static int usage(void);
+
+int main_samview(int argc, char *argv[])
+{
+ int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0;
+ samfile_t *in, *out;
+ char in_mode[4], out_mode[4], *fn_out = 0, *fn_list = 0;
+
+ /* parse command-line options */
+ strcpy(in_mode, "r"); strcpy(out_mode, "w");
+ while ((c = getopt(argc, argv, "bt:hHo:q:f:F:")) >= 0) {
+ switch (c) {
+ case 'b': strcat(out_mode, "b"); break;
+ case 't': fn_list = strdup(optarg); is_bamin = 0; break;
+ case 'h': is_header = 1; break;
+ case 'H': is_header_only = 1; break;
+ case 'o': fn_out = strdup(optarg); break;
+ case 'f': g_flag_on = strtol(optarg, 0, 0); break;
+ case 'F': g_flag_on = strtol(optarg, 0, 0); break;
+ case 'q': g_min_mapQ = atoi(optarg); break;
+ default: return usage();
+ }
+ }
+ if (is_header_only) is_header = 1;
+ if (is_bamin) strcat(in_mode, "b");
+ if (is_header) strcat(out_mode, "h");
+ if (argc == optind) return usage();
+
+ // open file handlers
+ in = samopen(argv[optind], in_mode, fn_list);
+ out = samopen(fn_out? fn_out : "-", out_mode, in->header);
+ if (is_header_only) goto view_end; // no need to print alignments
+
+ if (argc == optind + 1) { // convert/print the entire file
+ bam1_t *b = bam_init1();
+ int r;
+ while ((r = samread(in, b)) >= 0) // read one alignment from `in'
+ samwrite(out, b); // write the alignment to `out'
+ if (r < -1) fprintf(stderr, "[main_samview] truncated file.\n");
+ bam_destroy1(b);
+ } else { // retrieve alignments in specified regions
+ int i;
+ bam_index_t *idx = 0;
+ if (is_bamin) idx = bam_index_load(argv[optind]); // load BAM index
+ if (idx == 0) { // index is unavailable
+ fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM files.\n");
+ ret = 1;
+ goto view_end;
+ }
+ for (i = optind + 1; i < argc; ++i) {
+ int tid, beg, end;
+ bam_parse_region(in->header, argv[i], &tid, &beg, &end); // parse a region in the format like `chr2:100-200'
+ if (tid < 0) { // reference name is not found
+ fprintf(stderr, "[main_samview] fail to get the reference name. Continue anyway.\n");
+ continue;
+ }
+ bam_fetch(in->x.bam, idx, tid, beg, end, out, view_func); // fetch alignments
+ }
+ bam_index_destroy(idx); // destroy the BAM index
+ }
+
+view_end:
+ // close files, free and return
+ free(fn_list); free(fn_out);
+ samclose(in);
+ samclose(out);
+ return ret;
+}
+
+static int usage()
+{
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]]\n\n");
+ fprintf(stderr, "Options: -b output BAM\n");
+ fprintf(stderr, " -h print header for the SAM output\n");
+ fprintf(stderr, " -H print header only (no alignments)\n");
+ fprintf(stderr, " -t FILE list of reference names and lengths, assuming SAM input [null]\n");
+ fprintf(stderr, " -o FILE output file name [stdout]\n");
+ fprintf(stderr, " -f INT required flag, 0 for unset [0]\n");
+ fprintf(stderr, " -F INT filtering flag, 0 for unset [0]\n");
+ fprintf(stderr, " -q INT minimum mapping quality [0]\n");
+ fprintf(stderr, "\n\
+Notes:\n\
+\n\
+ 1. By default, this command assumes the file on the command line is in\n\
+ the BAM format and it prints the alignments in SAM. If `-t' is\n\
+ applied, the input file is assumed to be in the SAM format. The\n\
+ file supplied with `-t' is SPACE/TAB delimited with the first two\n\
+ fields of each line consisting of the reference name and the\n\
+ corresponding sequence length. The `.fai' file generated by `faidx'\n\
+ can be used here. This file may be empty if reads are unaligned.\n\
+\n\
+ 2. SAM->BAM conversion: `samtools view -bt ref.fa.fai in.sam.gz'.\n\
+\n\
+ 3. BAM->SAM conversion: `samtools view in.bam'.\n\
+\n\
+ 4. A region should be presented in one of the following formats:\n\
+ `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n\
+ specified, the input alignment file must be an indexed BAM file.\n\
+\n");
+ return 1;
+}
+
+int main_import(int argc, char *argv[])
+{
+ int argc2, ret;
+ char **argv2;
+ if (argc != 4) {
+ fprintf(stderr, "Usage: bamtk import <in.ref_list> <in.sam> <out.bam>\n");
+ return 1;
+ }
+ argc2 = 6;
+ argv2 = calloc(6, sizeof(char*));
+ argv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2];
+ ret = main_samview(argc2, argv2);
+ free(argv2);
+ return ret;
+}