X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sam_header.c;h=88b6a1c856bbf4e6b073c7c8d6e394c1a305094e;hb=28391e5898804ce6b805016d8c676fdf61442eb3;hp=05d75deb2c4ced014c6ae5f8612260d05b34660a;hpb=bef0453b4f1fefd6de0fad72abbcbbc379140a23;p=samtools.git diff --git a/sam_header.c b/sam_header.c index 05d75de..88b6a1c 100644 --- a/sam_header.c +++ b/sam_header.c @@ -38,7 +38,7 @@ const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL}; const char *r_sq_tags[] = {"SN","LN",NULL}; const char *u_sq_tags[] = {"SN",NULL}; -const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL}; +const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL}; const char *r_rg_tags[] = {"ID",NULL}; const char *u_rg_tags[] = {"ID",NULL}; @@ -366,6 +366,7 @@ static HeaderLine *sam_header_line_parse(const char *headerLine) while (*to && *to=='\t') to++; if ( to-from != 1 ) { debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); + free(hline); return 0; } from = to; @@ -434,8 +435,14 @@ static int sam_header_line_validate(HeaderLine *hline) tag = tags->data; if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) { - debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); - return 0; + // Lower case tags are user-defined values. + if( !(islower(tag->key[0]) || islower(tag->key[1])) ) + { + // Neither is lower case, but tag was not recognized. + debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); + // return 0; // Even unknown tags are allowed - for forward compatibility with new attributes + } + // else - allow user defined tag } tags = tags->next; } @@ -563,6 +570,7 @@ void *sam_header_parse2(const char *headerText) const char *text; char *buf=NULL; size_t nbuf = 0; + int tovalidate = 0; if ( !headerText ) return 0; @@ -571,7 +579,7 @@ void *sam_header_parse2(const char *headerText) while ( (text=nextline(&buf, &nbuf, text)) ) { hline = sam_header_line_parse(buf); - if ( hline && sam_header_line_validate(hline) ) + if ( hline && (!tovalidate || sam_header_line_validate(hline)) ) // With too many (~250,000) reference sequences the header parsing was too slow with list_append. hlines = list_append_to_end(hlines, hline); else @@ -662,6 +670,36 @@ char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n return ret; } +void *sam_header2key_val(void *iter, const char type[2], const char key_tag[2], const char value_tag[2], const char **_key, const char **_value) +{ + list_t *l = iter; + if ( !l ) return NULL; + + while (l) + { + HeaderLine *hline = l->data; + if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) + { + l = l->next; + continue; + } + + HeaderTag *key, *value; + key = header_line_has_tag(hline,key_tag); + value = header_line_has_tag(hline,value_tag); + if ( !key && !value ) + { + l = l->next; + continue; + } + + *_key = key->value; + *_value = value->value; + return l->next; + } + return l; +} + const char *sam_tbl_get(void *h, const char *key) { khash_t(str) *tbl = (khash_t(str)*)h; @@ -732,4 +770,41 @@ void *sam_header_merge(int n, const void **_dicts) return out_dict; } +char **sam_header2tbl_n(const void *dict, const char type[2], const char *tags[], int *n) +{ + int nout = 0; + char **out = NULL; + + *n = 0; + list_t *l = (list_t *)dict; + if ( !l ) return NULL; + + int i, ntags = 0; + while ( tags[ntags] ) ntags++; + + while (l) + { + HeaderLine *hline = l->data; + if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) + { + l = l->next; + continue; + } + out = (char**) realloc(out, sizeof(char*)*(nout+1)*ntags); + for (i=0; ivalue; + } + nout++; + l = l->next; + } + *n = nout; + return out; +}