From b3a8c553f7114a541812a069e6f105a6853f5a45 Mon Sep 17 00:00:00 2001
From: Petr Danecek <pd3@sanger.ac.uk>
Date: Fri, 26 Feb 2010 15:51:40 +0000
Subject: [PATCH] Improved efficiency of header parsing

---
 AUTHORS      |  4 ++++
 bam.h        |  2 +-
 bam_import.c | 19 +++++++++++++++++--
 sam_header.c | 10 +++++++++-
 4 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 435431c..95afabb 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -14,3 +14,7 @@ used in `faidx' for indexing RAZF compressed fasta files.
 
 Colin Hercus updated novo2sam.pl to support gapped alignment by
 novoalign.
+
+Petr Danecek contributed the header parsing library sam_header.c and 
+sam2vcf.pl script and added knet support to the RAZF library.
+
diff --git a/bam.h b/bam.h
index 291b303..8d6c431 100644
--- a/bam.h
+++ b/bam.h
@@ -87,7 +87,7 @@ typedef struct {
 	char **target_name;
 	uint32_t *target_len;
 	void *dict, *hash, *rg2lib;
-	int l_text;
+	size_t l_text, n_text;
 	char *text;
 } bam_header_t;
 
diff --git a/bam_import.c b/bam_import.c
index 3a4a5cd..9d84328 100644
--- a/bam_import.c
+++ b/bam_import.c
@@ -168,9 +168,24 @@ static inline void parse_error(int64_t n_lines, const char * __restrict msg)
 }
 static inline void append_text(bam_header_t *header, kstring_t *str)
 {
-	int x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null
+	size_t x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null
 	kroundup32(x); kroundup32(y);
-	if (x < y) header->text = (char*)realloc(header->text, y);
+	if (x < y) 
+    {
+        header->n_text = y;
+        header->text = (char*)realloc(header->text, y);
+        if ( !header->text ) 
+        {
+            fprintf(stderr,"realloc failed to alloc %ld bytes\n", y);
+            abort();
+        }
+    }
+    // Sanity check
+    if ( header->l_text+str->l+1 >= header->n_text )
+    {
+        fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n",  header->l_text+str->l+1,header->n_text,x,y);
+        abort();
+    }
 	strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here.
 	header->l_text += str->l + 1;
 	header->text[header->l_text] = 0;
diff --git a/sam_header.c b/sam_header.c
index 3879f3f..238c5cb 100644
--- a/sam_header.c
+++ b/sam_header.c
@@ -58,6 +58,14 @@ static void debug(const char *format, ...)
     va_end(ap);
 }
 
+static list_t *list_prepend(list_t *root, void *data)
+{
+    list_t *l = malloc(sizeof(list_t));
+    l->next = root;
+    l->data = data;
+    return l;
+}
+
 static list_t *list_append(list_t *root, void *data)
 {
     list_t *l = root;
@@ -543,7 +551,7 @@ void *sam_header_parse2(const char *headerText)
     {
         hline = sam_header_line_parse(buf);
         if ( hline && sam_header_line_validate(hline) )
-            hlines = list_append(hlines, hline);
+            hlines = list_prepend(hlines, hline);
         else
         {
 			if (hline) sam_header_line_free(hline);
-- 
2.39.5