* parse the @RG header lines and allow to choose library at the "samtools view"
command line
}
free(header->text);
#ifndef BAM_NO_HASH
+ if (header->rg2lib) bam_strmap_destroy(header->rg2lib);
bam_destroy_header_hash(header);
#endif
free(header);
int32_t n_targets;
char **target_name;
uint32_t *target_len;
- void *hash;
+ void *hash, *rg2lib;
int l_text;
char *text;
} bam_header_t;
bam_header_t *sam_header_read(tamFile fp);
int sam_header_parse(bam_header_t *h);
+ int sam_header_parse_rg(bam_header_t *h);
#define sam_write1(header, b) bam_view1(header, b)
+ int bam_strmap_put(void *strmap, const char *rg, const char *lib);
+ const char *bam_strmap_get(const void *strmap, const char *rg);
+ void *bam_strmap_dup(const void*);
+ void *bam_strmap_init();
+ void bam_strmap_destroy(void *strmap);
+
/*!
@abstract Initialize a header structure.
@return the pointer to the header structure
void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
// uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
- uint8_t *bam_aux_get(bam1_t *b, const char tag[2]);
+ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
int32_t bam_aux2i(const uint8_t *s);
float bam_aux2f(const uint8_t *s);
double bam_aux2d(const uint8_t *s);
#include <ctype.h>
#include "bam.h"
#include "khash.h"
+typedef char *str_p;
KHASH_MAP_INIT_STR(s, int)
+KHASH_MAP_INIT_STR(r2l, str_p)
void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data)
{
return bam_aux_get(b, tag);
}
*/
-uint8_t *bam_aux_get(bam1_t *b, const char tag[2])
+uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
{
uint8_t *s;
int y = tag[0]<<8 | tag[1];
else return 0;
}
+/******************
+ * rg2lib related *
+ ******************/
+
+int bam_strmap_put(void *rg2lib, const char *rg, const char *lib)
+{
+ int ret;
+ khint_t k;
+ khash_t(r2l) *h = (khash_t(r2l)*)rg2lib;
+ char *key = strdup(rg);
+ k = kh_put(r2l, h, key, &ret);
+ if (ret) kh_val(h, k) = strdup(lib);
+ else {
+ fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg);
+ free(key);
+ }
+ return 0;
+}
+
+const char *bam_strmap_get(const void *rg2lib, const char *rg)
+{
+ const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib;
+ khint_t k;
+ k = kh_get(r2l, h, rg);
+ if (k != kh_end(h)) return (const char*)kh_val(h, k);
+ else return 0;
+}
+
+void *bam_strmap_dup(const void *rg2lib)
+{
+ const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib;
+ khash_t(r2l) *g = kh_init(r2l);
+ khint_t k, l;
+ int ret;
+ for (k = kh_begin(h); k < kh_end(h); ++k) {
+ if (kh_exist(h, k)) {
+ char *key = strdup(kh_key(h, k));
+ l = kh_put(r2l, g, key, &ret);
+ kh_val(g, l) = strdup(kh_val(h, k));
+ }
+ }
+ return g;
+}
+
+void *bam_strmap_init()
+{
+ return (void*)kh_init(r2l);
+}
+
+void bam_strmap_destroy(void *rg2lib)
+{
+ khash_t(r2l) *h = (khash_t(r2l)*)rg2lib;
+ khint_t k;
+ if (h == 0) return;
+ for (k = kh_begin(h); k < kh_end(h); ++k) {
+ if (kh_exist(h, k)) {
+ free((char*)kh_key(h, k)); free(kh_val(h, k));
+ }
+ }
+ kh_destroy(r2l, h);
+}
+
+/*** The following routines were implemented by Nils Homer for color-space support in tview ***/
+
char bam_aux_getCSi(bam1_t *b, int i)
{
uint8_t *c = bam_aux_get(b, "CS");
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
+#include "kstring.h"
#include "bam.h"
#include "kseq.h"
#include "khash.h"
header->text[header->l_text] = 0;
}
+int sam_header_parse_rg(bam_header_t *h)
+{
+ kstring_t *rgid, *rglib;
+ char *p, *q, *s, *r;
+ int n = 0;
+
+ bam_strmap_destroy(h->rg2lib);
+ // parse @RG lines
+ h->rg2lib = bam_strmap_init();
+ rgid = calloc(1, sizeof(kstring_t));
+ rglib = calloc(1, sizeof(kstring_t));
+ s = h->text;
+ while ((s = strstr(s, "@RG")) != 0) {
+ if (rgid->l && rglib->l) {
+ bam_strmap_put(h->rg2lib, rgid->s, rglib->s);
+ ++n;
+ }
+ rgid->l = rglib->l = 0;
+ s += 3;
+ r = s;
+ if ((p = strstr(s, "ID:")) != 0) {
+ q = p + 3;
+ for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p);
+ kputsn(q, p - q, rgid);
+ } else {
+ fprintf(stderr, "[bam_header_parse] missing ID tag in @RG lines.\n");
+ break;
+ }
+ if (r < p) r = p;
+ if ((p = strstr(s, "LB:")) != 0) {
+ q = p + 3;
+ for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p);
+ kputsn(q, p - q, rglib);
+ } else {
+ fprintf(stderr, "[bam_header_parse] missing LB tag in @RG lines.\n");
+ break;
+ }
+ if (r < p) r = p;
+ s = r + 3;
+ }
+ if (rgid->l && rglib->l) {
+ bam_strmap_put(h->rg2lib, rgid->s, rglib->s);
+ ++n;
+ }
+ free(rgid->s); free(rgid);
+ free(rglib->s); free(rglib);
+ return n;
+}
+
int sam_header_parse(bam_header_t *h)
{
int i;
s = r + 3;
++i;
}
+ sam_header_parse_rg(h);
return h->n_targets;
header_err_ret:
#include "bam.h"
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.4-7 (r337)"
+#define PACKAGE_VERSION "0.1.4-8 (r338)"
#endif
int bam_taf2baf(int argc, char *argv[]);
int ksprintf(kstring_t *s, const char *fmt, ...);
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
-static inline int kputs(const char *p, kstring_t *s)
+static inline int kputsn(const char *p, int l, kstring_t *s)
{
- int l = strlen(p);
if (s->l + l + 1 >= s->m) {
s->m = s->l + l + 2;
kroundup32(s->m);
s->s = (char*)realloc(s->s, s->m);
}
- strcpy(s->s + s->l, p);
+ strncpy(s->s + s->l, p, l);
s->l += l;
+ s->s[s->l] = 0;
return l;
}
+static inline int kputs(const char *p, kstring_t *s)
+{
+ return kputsn(p, strlen(p), s);
+}
+
static inline int kputc(int c, kstring_t *s)
{
if (s->l + 1 >= s->m) {
h->target_len[i] = h0->target_len[i];
h->target_name[i] = strdup(h0->target_name[i]);
}
+ if (h0->rg2lib) h->rg2lib = bam_strmap_dup(h0->rg2lib);
return h;
}
fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
} else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
}
+ sam_header_parse_rg(fp->header);
} else if (mode[0] == 'w') { // write
fp->header = bam_header_dup((const bam_header_t*)aux);
if (mode[1] == 'b') { // binary
#include "sam.h"
static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
+static char *g_library;
-#define __g_skip_aln(b) (((b)->core.qual < g_min_mapQ) || ((b->core.flag & g_flag_on) != g_flag_on) \
- || (b->core.flag & g_flag_off))
+static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)
+{
+ if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))
+ return 1;
+ if (g_library) {
+ uint8_t *s = bam_aux_get(b, "RG");
+ if (s) {
+ const char *p = bam_strmap_get(h->rg2lib, s + 1);
+ return (p && strcmp(p, g_library) == 0)? 0 : 1;
+ } else return 1;
+ } else return 0;
+}
// callback function for bam_fetch()
static int view_func(const bam1_t *b, void *data)
{
- if (!__g_skip_aln(b)) samwrite((samfile_t*)data, b);
+ if (!__g_skip_aln(((samfile_t*)data)->header, b))
+ samwrite((samfile_t*)data, b);
return 0;
}
/* parse command-line options */
strcpy(in_mode, "r"); strcpy(out_mode, "w");
- while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:u")) >= 0) {
+ while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:")) >= 0) {
switch (c) {
case 'S': is_bamin = 0; break;
case 'b': is_bamout = 1; break;
case 'F': g_flag_off = strtol(optarg, 0, 0); break;
case 'q': g_min_mapQ = atoi(optarg); break;
case 'u': is_uncompressed = 1; break;
+ case 'l': g_library = strdup(optarg); break;
default: return usage();
}
}
bam1_t *b = bam_init1();
int r;
while ((r = samread(in, b)) >= 0) // read one alignment from `in'
- if (!__g_skip_aln(b))
+ if (!__g_skip_aln(in->header, b))
samwrite(out, b); // write the alignment to `out'
if (r < -1) fprintf(stderr, "[main_samview] truncated file.\n");
bam_destroy1(b);
view_end:
// close files, free and return
- free(fn_list); free(fn_out);
+ free(fn_list); free(fn_out); free(g_library);
samclose(in);
samclose(out);
return ret;