7 KSTREAM_INIT(gzFile, gzread, 16384)
11 #define write_cigar(_c, _n, _m, _v) do { \
14 _c = realloc(_c, _m * sizeof(unsigned)); \
19 int main(int argc, char *argv[])
23 kstring_t s, t[N_TMPSTR];
24 int dret, i, af_n, af_max, af_i;
25 long n_ctgs = 0, n_reads = 0, m_cigar = 0, n_cigar = 0;
26 unsigned *af, *cigar = 0;
29 fprintf(stderr, "\nUsage: ace2sam <in.ace>\n\n");
30 fprintf(stderr, " ace2sam in.ace > a2s.out 2> a2s.err\n");
31 fprintf(stderr, " (grep ^H a2s.err | sed s,^..,,; cat a2s.out) > a2s.sam\n");
32 fprintf(stderr, " grep ^S a2s.err | sed s,^..,, > a2s.fa\n\n");
35 s.l = s.m = 0; s.s = 0;
36 af_n = af_max = af_i = 0; af = 0;
37 for (i = 0; i < N_TMPSTR; ++i) t[i].l = t[i].m = 0, t[i].s = 0;
38 fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
40 while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
41 if (strcmp(s.s, "AS") == 0) {
42 ks_getuntil(ks, 0, &s, &dret); n_ctgs = atol(s.s);
43 ks_getuntil(ks, 0, &s, &dret); n_reads = atol(s.s);
44 if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
45 } else if (strcmp(s.s, "CO") == 0) {
46 t[0].l = t[1].l = t[2].l = t[3].l = t[4].l = 0; // 0: name; 1: padded; 2: unpadded; 3: CIGAR/flat-cigar; 4: SAM line
48 ks_getuntil(ks, 0, &s, &dret); kputs(s.s, &t[0]);
49 ks_getuntil(ks, 0, &s, &dret);
50 ks_getuntil(ks, 0, &s, &dret); n_reads = atoi(s.s);
51 ks_getuntil(ks, '\n', &s, &dret);
52 fprintf(stderr, "S >%s\n", t[0].s);
53 while (ks_getuntil(ks, '\n', &s, &dret) >= 0 && s.l > 0) {
55 fputs("S ", stderr); fputs(s.s, stderr); fputc('\n', stderr);
58 #define __padded2cigar(sp, su) do { \
59 int i, l_M = 0, l_D = 0; \
60 kputsn(sp.s, sp.l, &su); su.l = 0; \
61 for (i = 0; i < sp.l; ++i) { \
62 if (sp.s[i] == '*') { \
63 if (l_M) write_cigar(cigar, n_cigar, m_cigar, l_M<<4); \
66 if (l_D) write_cigar(cigar, n_cigar, m_cigar, l_D<<4 | 2); \
68 su.s[su.l++] = sp.s[i]; \
72 if (l_M) write_cigar(cigar, n_cigar, m_cigar, l_M<<4); \
73 else write_cigar(cigar, n_cigar, m_cigar, l_D<<4 | 2); \
77 __padded2cigar(t[1], t[2]);
78 for (i = 0; i < n_cigar; ++i) {
79 kputw(cigar[i]>>4, &t[3]); kputc("MIDNSHP=X"[cigar[i]&0xf], &t[3]);
81 kputsn(t[0].s, t[0].l, &t[4]); kputs("\t516\t", &t[4]); kputsn(t[0].s, t[0].l, &t[4]); kputs("\t1\t60\t", &t[4]);
82 kputsn(t[3].s, t[3].l, &t[4]); kputs("\t*\t0\t0\t", &t[4]); kputsn(t[2].s, t[2].l, &t[4]); kputs("\t*", &t[4]);
83 fprintf(stderr, "H @SQ\tSN:%s\tLN:%ld\n", t[0].s, t[1].l);
84 } else if (strcmp(s.s, "BQ") == 0) {
85 if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
87 for (i = 0; i < t[2].l; ++i) {
89 if (ks_getuntil(ks, 0, &s, &dret) < 0) fprintf(stderr, "E truncated contig quality\n");
94 if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
95 ks_getuntil(ks, '\n', &s, &dret);
96 puts(t[4].s); t[4].l = 0;
97 } else if (strcmp(s.s, "AF") == 0) { // read coordinate
98 int reversed, neg, pos;
99 if (t[4].l) puts(t[4].s);
101 ks_getuntil(ks, 0, &s, &dret);
102 ks_getuntil(ks, 0, &s, &dret); reversed = s.s[0] == 'C'? 1 : 0;
103 ks_getuntil(ks, 0, &s, &dret); pos = atoi(s.s); neg = pos < 0? 1 : 0; pos = pos < 0? -pos : pos;
104 if (af_n == af_max) {
105 af_max = af_max? af_max<<1 : 4;
106 af = realloc(af, af_max * sizeof(unsigned));
108 af[af_n++] = pos << 2 | neg << 1 | reversed;
109 } else if (strcmp(s.s, "RD") == 0) { // read sequence
110 t[2].l = t[3].l = t[4].l = 0;
111 ks_getuntil(ks, 0, &t[4], &dret); kputc('\t', &t[4]); // read name
112 kputw((af[af_i]&1)? 16 : 0, &t[4]); kputc('\t', &t[4]);
113 kputsn(t[0].s, t[0].l, &t[4]); kputc('\t', &t[4]); // the SAM line stops at RNAME
114 if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
115 while (ks_getuntil(ks, '\n', &s, &dret) >= 0 && s.l > 0) kputs(s.s, &t[2]);
116 } else if (strcmp(s.s, "QA") == 0) { // clipping
118 ks_getuntil(ks, 0, &s, &dret); ks_getuntil(ks, 0, &s, &dret); // skip quality clipping
119 ks_getuntil(ks, 0, &s, &dret); beg = atoi(s.s) - 1; // align clipping start
120 ks_getuntil(ks, 0, &s, &dret); end = atoi(s.s);
122 if (af[af_i]>>1&1) pos = -pos;
124 kputw(pos, &t[4]); kputs("\t60\t", &t[4]);
125 n_cigar = 0; // start to generate CIGAR
126 if (beg) write_cigar(cigar, n_cigar, m_cigar, beg<<4|4);
127 __padded2cigar(t[2], t[3]);
128 if (end < t[2].l) write_cigar(cigar, n_cigar, m_cigar, (t[2].l-end)<<4|4);
129 if (n_cigar > 1 && (cigar[0]&0xf) == 4) cigar[1] -= beg<<4;
130 if (n_cigar > 1 && (cigar[n_cigar-1]&0xf) == 4) cigar[n_cigar-2] -= cigar[n_cigar-1]>>4<<4;
131 for (i = 0; i < n_cigar; ++i) {
132 kputw(cigar[i]>>4, &t[4]); kputc("MIDNSHP=X"[cigar[i]&0xf], &t[4]);
134 kputs("\t*\t0\t0\t", &t[4]);
135 kputsn(t[3].s, t[3].l, &t[4]); kputs("\t*", &t[4]);
138 } else if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
142 free(af); free(s.s); free(cigar);
143 for (i = 0; i < N_TMPSTR; ++i) free(t[i].s);