X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sam_header.c;h=d348d10e89b0aa26a1da127b83a1d86091f02763;hb=ccb7838cb53bf0ca6917a77f7991d940057c12db;hp=3879f3fc2a80fbd65ffd1dcb8c6858b280707378;hpb=2bcc5f0d7f87c2c46878afe361112ae8e14c53f1;p=samtools.git diff --git a/sam_header.c b/sam_header.c index 3879f3f..d348d10 100644 --- a/sam_header.c +++ b/sam_header.c @@ -10,6 +10,7 @@ KHASH_MAP_INIT_STR(str, const char *) struct _HeaderList { + struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only. struct _HeaderList *next; void *data; }; @@ -37,7 +38,7 @@ const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL}; const char *r_sq_tags[] = {"SN","LN",NULL}; const char *u_sq_tags[] = {"SN",NULL}; -const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL}; +const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL}; const char *r_rg_tags[] = {"ID",NULL}; const char *u_rg_tags[] = {"ID",NULL}; @@ -58,6 +59,34 @@ static void debug(const char *format, ...) va_end(ap); } +#if 0 +// Replaced by list_append_to_end +static list_t *list_prepend(list_t *root, void *data) +{ + list_t *l = malloc(sizeof(list_t)); + l->next = root; + l->data = data; + return l; +} +#endif + +// Relies on the root->last being correct. Do not use with the other list_* +// routines unless they are fixed to modify root->last as well. +static list_t *list_append_to_end(list_t *root, void *data) +{ + list_t *l = malloc(sizeof(list_t)); + l->last = l; + l->next = NULL; + l->data = data; + + if ( !root ) + return l; + + root->last->next = l; + root->last = l; + return root; +} + static list_t *list_append(list_t *root, void *data) { list_t *l = root; @@ -322,7 +351,7 @@ static HeaderLine *sam_header_line_parse(const char *headerLine) while (*to && *to!='\t') to++; if ( to-from != 2 ) { - debug("[sam_header_line_parse] expected '@XY', got [%s]\n", headerLine); + debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine); return 0; } @@ -405,8 +434,14 @@ static int sam_header_line_validate(HeaderLine *hline) tag = tags->data; if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) { - debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); - return 0; + // Lower case tags are user-defined values. + if( !(islower(tag->key[0]) || islower(tag->key[1])) ) + { + // Neither is lower case, but tag was not recognized. + debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); + // return 0; // Even unknown tags are allowed - for forward compatibility with new attributes + } + // else - allow user defined tag } tags = tags->next; } @@ -534,6 +569,7 @@ void *sam_header_parse2(const char *headerText) const char *text; char *buf=NULL; size_t nbuf = 0; + int tovalidate = 0; if ( !headerText ) return 0; @@ -542,8 +578,9 @@ void *sam_header_parse2(const char *headerText) while ( (text=nextline(&buf, &nbuf, text)) ) { hline = sam_header_line_parse(buf); - if ( hline && sam_header_line_validate(hline) ) - hlines = list_append(hlines, hline); + if ( hline && (!tovalidate || sam_header_line_validate(hline)) ) + // With too many (~250,000) reference sequences the header parsing was too slow with list_append. + hlines = list_append_to_end(hlines, hline); else { if (hline) sam_header_line_free(hline);