X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sam_header.c;h=d348d10e89b0aa26a1da127b83a1d86091f02763;hb=ccb7838cb53bf0ca6917a77f7991d940057c12db;hp=036ea88d28c7b2b82bd05606bbd79120887e86f3;hpb=afda9d0ed6428b3e54d6acf010e77f5f49184fae;p=samtools.git diff --git a/sam_header.c b/sam_header.c index 036ea88..d348d10 100644 --- a/sam_header.c +++ b/sam_header.c @@ -38,7 +38,7 @@ const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL}; const char *r_sq_tags[] = {"SN","LN",NULL}; const char *u_sq_tags[] = {"SN",NULL}; -const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL}; +const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL}; const char *r_rg_tags[] = {"ID",NULL}; const char *u_rg_tags[] = {"ID",NULL}; @@ -351,7 +351,7 @@ static HeaderLine *sam_header_line_parse(const char *headerLine) while (*to && *to!='\t') to++; if ( to-from != 2 ) { - debug("[sam_header_line_parse] expected '@XY', got [%s]\n", headerLine); + debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine); return 0; } @@ -434,8 +434,14 @@ static int sam_header_line_validate(HeaderLine *hline) tag = tags->data; if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) { - debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); - return 0; + // Lower case tags are user-defined values. + if( !(islower(tag->key[0]) || islower(tag->key[1])) ) + { + // Neither is lower case, but tag was not recognized. + debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); + // return 0; // Even unknown tags are allowed - for forward compatibility with new attributes + } + // else - allow user defined tag } tags = tags->next; } @@ -563,6 +569,7 @@ void *sam_header_parse2(const char *headerText) const char *text; char *buf=NULL; size_t nbuf = 0; + int tovalidate = 0; if ( !headerText ) return 0; @@ -571,7 +578,7 @@ void *sam_header_parse2(const char *headerText) while ( (text=nextline(&buf, &nbuf, text)) ) { hline = sam_header_line_parse(buf); - if ( hline && sam_header_line_validate(hline) ) + if ( hline && (!tovalidate || sam_header_line_validate(hline)) ) // With too many (~250,000) reference sequences the header parsing was too slow with list_append. hlines = list_append_to_end(hlines, hline); else