]> git.donarmstrong.com Git - samtools.git/blob - sam.c
* samtools-0.1.3-12 (r259)
[samtools.git] / sam.c
1 #include <string.h>
2 #include "sam.h"
3
4 #define TYPE_BAM  1
5 #define TYPE_READ 2
6
7 bam_header_t *bam_header_dup(const bam_header_t *h0)
8 {
9         bam_header_t *h;
10         int i;
11         h = bam_header_init();
12         *h = *h0;
13         h->hash = 0;
14         h->text = (char*)calloc(h->l_text + 1, 1);
15         memcpy(h->text, h0->text, h->l_text);
16         h->target_len = (uint32_t*)calloc(h->n_targets, 4);
17         h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
18         for (i = 0; i < h->n_targets; ++i) {
19                 h->target_len[i] = h0->target_len[i];
20                 h->target_name[i] = strdup(h0->target_name[i]);
21         }
22         return h;
23 }
24
25 bam_header_t *bam_header_parse(const char *text)
26 {
27         bam_header_t *h;
28         int i;
29         char *s, *p, *q, *r;
30
31         i = strlen(text);
32         if (i < 3) return 0; // headerless
33         h = bam_header_init();
34         h->l_text = i;
35         h->text = strdup(text);
36         s = h->text;
37         while ((s = strstr(s, "@SQ")) != 0) {
38                 ++h->n_targets;
39                 s += 3;
40         }
41         h->target_len = (uint32_t*)calloc(h->n_targets, 4);
42         h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
43         i = 0;
44         s = h->text;
45         while ((s = strstr(s, "@SQ")) != 0) {
46                 s += 3;
47                 r = s;
48                 if ((p = strstr(s, "SN:")) != 0) {
49                         q = p + 3;
50                         for (p = q; *p && *p != '\t'; ++p);
51                         h->target_name[i] = (char*)calloc(p - q + 1, 1);
52                         strncpy(h->target_name[i], q, p - q);
53                 } else goto header_err_ret;
54                 if (r < p) r = p;
55                 if ((p = strstr(s, "LN:")) != 0) h->target_len[i] = strtol(p + 3, 0, 10);
56                 else goto header_err_ret;
57                 if (r < p) r = p;
58                 s = r + 3;
59                 ++i;
60         }
61         if (h->n_targets == 0) {
62                 bam_header_destroy(h);
63                 return 0;
64         } else return h;
65
66 header_err_ret:
67         fprintf(stderr, "[bam_header_parse] missing SN tag in a @SQ line.\n");
68         bam_header_destroy(h);
69         return 0;
70 }
71
72 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
73 {
74         samfile_t *fp;
75         fp = (samfile_t*)calloc(1, sizeof(samfile_t));
76         if (mode[0] == 'r') {
77                 const char *fn_list = (const char*)aux;
78                 fp->type |= TYPE_READ;
79                 if (mode[1] == 'b') { // binary
80                         fp->type |= TYPE_BAM;
81                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
82                         fp->header = bam_header_read(fp->x.bam);
83                 } else {
84                         fp->x.tamr = sam_open(fn);
85                         fp->header = sam_header_read2(fn_list);
86                 }
87         } else if (mode[0] == 'w') {
88                 fp->header = bam_header_dup((const bam_header_t*)aux);
89                 if (mode[1] == 'b') { // binary
90                         fp->type |= TYPE_BAM;
91                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "w") : bam_dopen(fileno(stdout), "w");
92                         bam_header_write(fp->x.bam, fp->header);
93                 } else {
94                         int i;
95                         bam_header_t *alt = 0;
96                         alt = bam_header_parse(fp->header->text);
97                         fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
98                         if (alt) {
99                                 if (alt->n_targets != fp->header->n_targets)
100                                         fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
101                                 bam_header_destroy(alt);
102                                 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
103                         }
104                         if (strstr(mode, "h")) // print header
105                                 for (i = 0; i < fp->header->n_targets; ++i)
106                                         fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
107                 }
108         }
109         return fp;
110 }
111
112 void samclose(samfile_t *fp)
113 {
114         if (fp == 0) return;
115         if (fp->header) bam_header_destroy(fp->header);
116         if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
117         else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
118         else fclose(fp->x.tamw);
119         free(fp);
120 }
121
122 int samread(samfile_t *fp, bam1_t *b)
123 {
124         if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
125         if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
126         else return sam_read1(fp->x.tamr, fp->header, b);
127 }
128
129 int samwrite(samfile_t *fp, const bam1_t *b)
130 {
131         if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
132         if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
133         else {
134                 char *s = bam_format1(fp->header, b);
135                 int l = strlen(s);
136                 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
137                 free(s);
138                 return l + 1;
139         }
140 }
141
142 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
143 {
144         bam_plbuf_t *buf;
145         int ret;
146         bam1_t *b;
147         b = bam_init1();
148         buf = bam_plbuf_init(func, func_data);
149         bam_plbuf_set_mask(buf, mask);
150         while ((ret = samread(fp, b)) >= 0)
151                 bam_plbuf_push(b, buf);
152         bam_plbuf_push(0, buf);
153         bam_plbuf_destroy(buf);
154         bam_destroy1(b);
155         return 0;
156 }