From 6f6733c7551a2f4697c46acb4a516fa04bba3e40 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 24 Jul 2009 11:34:30 +0000 Subject: [PATCH] * samtools-0.1.5-17 (r416) * support import/export SAM with string tags --- bam.c | 16 ++++++++++++---- bam.h | 6 +++++- bam_import.c | 28 +++++++++++++++++++++++++++- bamtk.c | 2 +- sam.c | 7 ++++--- sam.h | 13 +++++++------ sam_view.c | 14 ++++++++++---- 7 files changed, 66 insertions(+), 20 deletions(-) diff --git a/bam.c b/bam.c index 33907f1..2edb2a8 100644 --- a/bam.c +++ b/bam.c @@ -6,6 +6,7 @@ #include "kstring.h" int bam_is_be = 0; +char *bam_flag2char_table = "pPuUrR12sfd\0\0\0\0\0"; /************************** * CIGAR related routines * @@ -236,7 +237,7 @@ int bam_write1(bamFile fp, const bam1_t *b) return bam_write1_core(fp, &b->core, b->data_len, b->data); } -char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int is_hex) +char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) { uint8_t *s = bam1_seq(b), *t = bam1_qual(b); int i; @@ -244,8 +245,15 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int is_hex) kstring_t str; str.l = str.m = 0; str.s = 0; - if (is_hex) ksprintf(&str, "%s\t0x%x\t", bam1_qname(b), c->flag); - else ksprintf(&str, "%s\t%d\t", bam1_qname(b), c->flag); + ksprintf(&str, "%s\t", bam1_qname(b)); + if (of == BAM_OFDEC) ksprintf(&str, "%d\t", c->flag); + else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag); + else { // BAM_OFSTR + for (i = 0; i < 16; ++i) + if ((c->flag & 1<tid < 0) kputs("*\t", &str); else ksprintf(&str, "%s\t", header->target_name[c->tid]); ksprintf(&str, "%d\t%d\t", c->pos + 1, c->qual); @@ -285,7 +293,7 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int is_hex) char *bam_format1(const bam_header_t *header, const bam1_t *b) { - return bam_format1_core(header, b, 0); + return bam_format1_core(header, b, BAM_OFDEC); } void bam_view1(const bam_header_t *header, const bam1_t *b) diff --git a/bam.h b/bam.h index a33006d..ec983df 100644 --- a/bam.h +++ b/bam.h @@ -113,6 +113,10 @@ typedef struct { /*! @abstract optical or PCR duplicate */ #define BAM_FDUP 1024 +#define BAM_OFDEC 0 +#define BAM_OFHEX 1 +#define BAM_OFSTR 2 + /*! @abstract defautl mask for pileup */ #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) @@ -431,7 +435,7 @@ extern "C" { */ char *bam_format1(const bam_header_t *header, const bam1_t *b); - char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int is_hex); + char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of); /*! @typedef @abstract Structure for one alignment covering the pileup position. diff --git a/bam_import.c b/bam_import.c index 2cadfcc..81d3720 100644 --- a/bam_import.c +++ b/bam_import.c @@ -36,6 +36,25 @@ unsigned char bam_nt16_table[256] = { 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 }; +unsigned short bam_char2flag_table[256] = { + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,BAM_FREAD1,BAM_FREAD2,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + BAM_FPROPER_PAIR,0,BAM_FMREVERSE,0, 0,BAM_FMUNMAP,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, BAM_FDUP,0,BAM_FQCFAIL,0, 0,0,0,0, 0,0,0,0, + BAM_FPAIRED,0,BAM_FREVERSE,BAM_FSECONDARY, 0,BAM_FUNMAP,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 +}; + char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN"; struct __tamFile_t { @@ -296,12 +315,19 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) memcpy(alloc_data(b, doff + c->l_qname) + doff, str->s, c->l_qname); doff += c->l_qname; } - { // flag, tid, pos, qual + { // flag long flag; char *s; ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; flag = strtol((char*)str->s, &s, 0); + if (*s) { // not the end of the string + flag = 0; + for (s = str->s; *s; ++s) + flag |= bam_char2flag_table[(int)*s]; + } c->flag = flag; + } + { // tid, pos, qual ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->tid = bam_get_tid(header, str->s); if (c->tid < 0 && strcmp(str->s, "*")) { if (header->n_targets == 0) { diff --git a/bamtk.c b/bamtk.c index 5df1407..60f6204 100644 --- a/bamtk.c +++ b/bamtk.c @@ -4,7 +4,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.5-12 (r415)" +#define PACKAGE_VERSION "0.1.5-13 (r416)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/sam.c b/sam.c index a3f2998..e8e742c 100644 --- a/sam.c +++ b/sam.c @@ -3,7 +3,6 @@ #define TYPE_BAM 1 #define TYPE_READ 2 -#define TYPE_HEX 4 bam_header_t *bam_header_dup(const bam_header_t *h0) { @@ -76,7 +75,9 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux) // open file fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; if (fp->x.tamr == 0) goto open_err_ret; - if (strstr(mode, "x")) fp->type |= TYPE_HEX; + if (strstr(mode, "X")) fp->type |= BAM_OFSTR<<2; + else if (strstr(mode, "x")) fp->type |= BAM_OFHEX<<2; + else fp->type |= BAM_OFDEC<<2; // write header if (strstr(mode, "h")) { int i; @@ -128,7 +129,7 @@ int samwrite(samfile_t *fp, const bam1_t *b) if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b); else { - char *s = bam_format1_core(fp->header, b, fp->type & TYPE_HEX); + char *s = bam_format1_core(fp->header, b, fp->type>>2&3); int l = strlen(s); fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw); free(s); diff --git a/sam.h b/sam.h index b8a0064..f06439b 100644 --- a/sam.h +++ b/sam.h @@ -15,7 +15,7 @@ /*! @typedef @abstract SAM/BAM file handler - @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3 for is_hex + @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3-4 for flag format @field bam BAM file handler; valid if (type&1) == 1 @field tamr SAM file handler for reading; valid if type == 2 @field tamw SAM file handler for writing; valid if type == 0 @@ -41,11 +41,12 @@ extern "C" { @param fn SAM/BAM file name; "-" is recognized as stdin (for reading) or stdout (for writing). - @param mode open mode /[rw](b?)(u?)(h?)(x?)/: 'r' for reading, 'w' - for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output, 'h' - for outputing header in SAM and 'x' for HEX flag. If 'b' present, - it must immediately follow 'r' or 'w'. Valid modes are "r", "w", - "wh", "wx", "whx", "rb", "wb" and "wbu" exclusively. + @param mode open mode /[rw](b?)(u?)(h?)([xX]?)/: 'r' for reading, + 'w' for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output, + 'h' for outputing header in SAM, 'x' for HEX flag and 'X' for + string flag. If 'b' present, it must immediately follow 'r' or + 'w'. Valid modes are "r", "w", "wh", "wx", "whx", "wX", "whX", + "rb", "wb" and "wbu" exclusively. @param aux auxiliary data; if mode[0]=='w', aux points to bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM diff --git a/sam_view.c b/sam_view.c index f253dbf..9af2194 100644 --- a/sam_view.c +++ b/sam_view.c @@ -35,13 +35,14 @@ static int usage(void); int main_samview(int argc, char *argv[]) { - int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0, is_hex = 0; + int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0; + int of_type = BAM_OFDEC; samfile_t *in = 0, *out = 0; char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0; /* parse command-line options */ strcpy(in_mode, "r"); strcpy(out_mode, "w"); - while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:x")) >= 0) { + while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX")) >= 0) { switch (c) { case 'S': is_bamin = 0; break; case 'b': is_bamout = 1; break; @@ -55,17 +56,21 @@ int main_samview(int argc, char *argv[]) case 'u': is_uncompressed = 1; break; case 'l': g_library = strdup(optarg); break; case 'r': g_rg = strdup(optarg); break; - case 'x': is_hex = 1; break; + case 'x': of_type = BAM_OFHEX; break; + case 'X': of_type = BAM_OFSTR; break; default: return usage(); } } if (is_uncompressed) is_bamout = 1; if (is_header_only) is_header = 1; if (is_bamout) strcat(out_mode, "b"); + else { + if (of_type == BAM_OFHEX) strcat(out_mode, "x"); + else if (of_type == BAM_OFSTR) strcat(out_mode, "X"); + } if (is_bamin) strcat(in_mode, "b"); if (is_header) strcat(out_mode, "h"); if (is_uncompressed) strcat(out_mode, "u"); - if (is_hex && !is_bamout) strcat(out_mode, "x"); if (argc == optind) return usage(); // open file handlers @@ -126,6 +131,7 @@ static int usage() fprintf(stderr, " -S input is SAM\n"); fprintf(stderr, " -u uncompressed BAM output (force -b)\n"); fprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\n"); + fprintf(stderr, " -X output FLAG in stirng (samtools-C specific)\n"); fprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\n"); fprintf(stderr, " -o FILE output file name [stdout]\n"); fprintf(stderr, " -f INT required flag, 0 for unset [0]\n"); -- 2.39.2