#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
+#include "kstring.h"
#include "bam.h"
#include "kseq.h"
#include "khash.h"
header->text[header->l_text] = 0;
}
+int sam_header_parse_rg(bam_header_t *h)
+{
+ kstring_t *rgid, *rglib;
+ char *p, *q, *s, *r;
+ int n = 0;
+
+ // free
+ if (h == 0) return 0;
+ bam_strmap_destroy(h->rg2lib); h->rg2lib = 0;
+ if (h->l_text < 3) return 0;
+ // parse @RG lines
+ h->rg2lib = bam_strmap_init();
+ rgid = calloc(1, sizeof(kstring_t));
+ rglib = calloc(1, sizeof(kstring_t));
+ s = h->text;
+ while ((s = strstr(s, "@RG")) != 0) {
+ if (rgid->l && rglib->l) {
+ bam_strmap_put(h->rg2lib, rgid->s, rglib->s);
+ ++n;
+ }
+ rgid->l = rglib->l = 0;
+ s += 3;
+ r = s;
+ if ((p = strstr(s, "ID:")) != 0) {
+ q = p + 3;
+ for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p);
+ kputsn(q, p - q, rgid);
+ } else {
+ fprintf(stderr, "[bam_header_parse] missing ID tag in @RG lines.\n");
+ break;
+ }
+ if (r < p) r = p;
+ if ((p = strstr(s, "LB:")) != 0) {
+ q = p + 3;
+ for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p);
+ kputsn(q, p - q, rglib);
+ } else {
+ fprintf(stderr, "[bam_header_parse] missing LB tag in @RG lines.\n");
+ break;
+ }
+ if (r < p) r = p;
+ s = r + 3;
+ }
+ if (rgid->l && rglib->l) {
+ bam_strmap_put(h->rg2lib, rgid->s, rglib->s);
+ ++n;
+ }
+ free(rgid->s); free(rgid);
+ free(rglib->s); free(rglib);
+ if (n == 0) {
+ bam_strmap_destroy(h->rg2lib);
+ h->rg2lib = 0;
+ }
+ return n;
+}
+
int sam_header_parse(bam_header_t *h)
{
int i;
s = r + 3;
++i;
}
+ sam_header_parse_rg(h);
return h->n_targets;
header_err_ret:
{ // flag, tid, pos, qual
ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->flag = atoi(str->s);
ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->tid = bam_get_tid(header, str->s);
- if (c->tid < 0 && strcmp(str->s, "*"))
- fprintf(stderr, "[sam_read1] reference '%s' is recognized as '*'.\n", str->s);
+ if (c->tid < 0 && strcmp(str->s, "*")) {
+ if (header->n_targets == 0) {
+ fprintf(stderr, "[sam_read1] missing header? Abort!\n");
+ exit(1);
+ } else fprintf(stderr, "[sam_read1] reference '%s' is recognized as '*'.\n", str->s);
+ }
ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->qual = isdigit(str->s[0])? atoi(str->s) : 0;
if (ret < 0) return -2;
if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b)))
parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;
- bzero(p, (c->l_qseq+1)/2);
+ memset(p, 0, (c->l_qseq+1)/2);
for (i = 0; i < c->l_qseq; ++i)
p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);
if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual