]> git.donarmstrong.com Git - samtools.git/blob - sam.c
* samtools-0.1.3-13 (r260)
[samtools.git] / sam.c
1 #include <string.h>
2 #include "sam.h"
3
4 #define TYPE_BAM  1
5 #define TYPE_READ 2
6
7 bam_header_t *bam_header_dup(const bam_header_t *h0)
8 {
9         bam_header_t *h;
10         int i;
11         h = bam_header_init();
12         *h = *h0;
13         h->hash = 0;
14         h->text = (char*)calloc(h->l_text + 1, 1);
15         memcpy(h->text, h0->text, h->l_text);
16         h->target_len = (uint32_t*)calloc(h->n_targets, 4);
17         h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
18         for (i = 0; i < h->n_targets; ++i) {
19                 h->target_len[i] = h0->target_len[i];
20                 h->target_name[i] = strdup(h0->target_name[i]);
21         }
22         return h;
23 }
24
25 bam_header_t *bam_header_parse(const char *text)
26 {
27         bam_header_t *h;
28         int i;
29         char *s, *p, *q, *r;
30
31         i = strlen(text);
32         if (i < 3) return 0; // headerless
33         h = bam_header_init();
34         h->l_text = i;
35         h->text = strdup(text);
36         s = h->text;
37         while ((s = strstr(s, "@SQ")) != 0) {
38                 ++h->n_targets;
39                 s += 3;
40         }
41         h->target_len = (uint32_t*)calloc(h->n_targets, 4);
42         h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
43         i = 0;
44         s = h->text;
45         while ((s = strstr(s, "@SQ")) != 0) {
46                 s += 3;
47                 r = s;
48                 if ((p = strstr(s, "SN:")) != 0) {
49                         q = p + 3;
50                         for (p = q; *p && *p != '\t'; ++p);
51                         h->target_name[i] = (char*)calloc(p - q + 1, 1);
52                         strncpy(h->target_name[i], q, p - q);
53                 } else goto header_err_ret;
54                 if (r < p) r = p;
55                 if ((p = strstr(s, "LN:")) != 0) h->target_len[i] = strtol(p + 3, 0, 10);
56                 else goto header_err_ret;
57                 if (r < p) r = p;
58                 s = r + 3;
59                 ++i;
60         }
61         if (h->n_targets == 0) {
62                 bam_header_destroy(h);
63                 return 0;
64         } else return h;
65
66 header_err_ret:
67         fprintf(stderr, "[bam_header_parse] missing SN tag in a @SQ line.\n");
68         bam_header_destroy(h);
69         return 0;
70 }
71
72 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
73 {
74         samfile_t *fp;
75         fp = (samfile_t*)calloc(1, sizeof(samfile_t));
76         if (mode[0] == 'r') {
77                 const char *fn_list = (const char*)aux;
78                 fp->type |= TYPE_READ;
79                 if (mode[1] == 'b') { // binary
80                         fp->type |= TYPE_BAM;
81                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
82                         if (fp->x.bam == 0) goto open_err_ret;
83                         fp->header = bam_header_read(fp->x.bam);
84                 } else {
85                         fp->x.tamr = sam_open(fn);
86                         if (fp->x.tamr == 0) goto open_err_ret;
87                         fp->header = sam_header_read2(fn_list);
88                 }
89         } else if (mode[0] == 'w') {
90                 fp->header = bam_header_dup((const bam_header_t*)aux);
91                 if (mode[1] == 'b') { // binary
92                         fp->type |= TYPE_BAM;
93                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "w") : bam_dopen(fileno(stdout), "w");
94                         if (fp->x.bam == 0) goto open_err_ret;
95                         bam_header_write(fp->x.bam, fp->header);
96                 } else {
97                         int i;
98                         bam_header_t *alt = 0;
99                         alt = bam_header_parse(fp->header->text);
100                         fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
101                         if (fp->x.tamr == 0) goto open_err_ret;
102                         if (alt) {
103                                 if (alt->n_targets != fp->header->n_targets)
104                                         fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
105                                 bam_header_destroy(alt);
106                                 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
107                         }
108                         if (strstr(mode, "h")) // print header
109                                 for (i = 0; i < fp->header->n_targets; ++i)
110                                         fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
111                 }
112         }
113         return fp;
114
115 open_err_ret:
116         free(fp);
117         return 0;
118 }
119
120 void samclose(samfile_t *fp)
121 {
122         if (fp == 0) return;
123         if (fp->header) bam_header_destroy(fp->header);
124         if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
125         else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
126         else fclose(fp->x.tamw);
127         free(fp);
128 }
129
130 int samread(samfile_t *fp, bam1_t *b)
131 {
132         if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
133         if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
134         else return sam_read1(fp->x.tamr, fp->header, b);
135 }
136
137 int samwrite(samfile_t *fp, const bam1_t *b)
138 {
139         if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
140         if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
141         else {
142                 char *s = bam_format1(fp->header, b);
143                 int l = strlen(s);
144                 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
145                 free(s);
146                 return l + 1;
147         }
148 }
149
150 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
151 {
152         bam_plbuf_t *buf;
153         int ret;
154         bam1_t *b;
155         b = bam_init1();
156         buf = bam_plbuf_init(func, func_data);
157         bam_plbuf_set_mask(buf, mask);
158         while ((ret = samread(fp, b)) >= 0)
159                 bam_plbuf_push(b, buf);
160         bam_plbuf_push(0, buf);
161         bam_plbuf_destroy(buf);
162         bam_destroy1(b);
163         return 0;
164 }