bam_header_t *sam_header_read2(const char *fn)
{
bam_header_t *header;
- int c, dret, ret;
+ int c, dret, ret, error = 0;
gzFile fp;
kstream_t *ks;
kstring_t *str;
ks_getuntil(ks, 0, str, &dret);
len = atoi(str->s);
k = kh_put(ref, hash, s, &ret);
+ if (ret == 0) {
+ fprintf(stderr, "[sam_header_read2] duplicated sequence name: %s\n", s);
+ error = 1;
+ }
kh_value(hash, k) = (uint64_t)len<<32 | i;
if (dret != '\n')
while ((c = ks_getc(ks)) != '\n' && c != -1);
gzclose(fp);
free(str->s); free(str);
fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", kh_size(hash));
+ if (error) return 0;
header = hash2header(hash);
kh_destroy(ref, hash);
return header;
header->text[header->l_text] = 0;
}
-int sam_header_parse_rg(bam_header_t *h)
-{
- if (h->dict == 0) h->dict = sam_header_parse2(h->text);
- if (h->rg2lib) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB");
- return sam_tbl_size(h->rg2lib);
-}
-
int sam_header_parse(bam_header_t *h)
{
- void *tbl;
char **tmp;
int i;
free(h->target_len); free(h->target_name);
h->n_targets = 0; h->target_len = 0; h->target_name = 0;
if (h->l_text < 3) return 0;
if (h->dict == 0) h->dict = sam_header_parse2(h->text);
- tbl = sam_header2tbl(h->dict, "SQ", "SN", "LN");
- h->n_targets = sam_tbl_size(tbl);
- if (h->n_targets == 0) {
- sam_tbl_destroy(tbl);
- return 0;
- }
- h->target_len = (uint32_t*)calloc(h->n_targets, 4);
- h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
- tmp = (char**)calloc(h->n_targets, sizeof(void*));
- sam_tbl_pair(tbl, h->target_name, tmp);
+ tmp = sam_header2list(h->dict, "SQ", "SN", &h->n_targets);
+ if (h->n_targets == 0) return 0;
+ h->target_name = calloc(h->n_targets, sizeof(void*));
+ for (i = 0; i < h->n_targets; ++i)
+ h->target_name[i] = strdup(tmp[i]);
+ free(tmp);
+ tmp = sam_header2list(h->dict, "SQ", "LN", &h->n_targets);
+ h->target_len = calloc(h->n_targets, 4);
for (i = 0; i < h->n_targets; ++i)
h->target_len[i] = atoi(tmp[i]);
free(tmp);
- sam_tbl_destroy(tbl);
return h->n_targets;
}