]> git.donarmstrong.com Git - mothur.git/blob - writechhit.cpp
85ec26d7ed31ad1fda9349b0fb38e862e91d3ae5
[mothur.git] / writechhit.cpp
1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.\r
2 \r
3 #include "myutils.h"\r
4 #include "chime.h"\r
5 \r
6 void WriteChimeFileHdr(FILE *f)\r
7         {\r
8         if (f == 0)\r
9                 return;\r
10 \r
11         fprintf(f,\r
12                 "\tQuery"               // 1\r
13                 "\tA"                   // 2\r
14                 "\tB"                   // 3\r
15                 "\tIdQM"                // 4\r
16                 "\tIdQA"                // 5\r
17                 "\tIdQB"                // 6\r
18                 "\tIdAB"                // 7\r
19                 "\tIdQT"                // 8\r
20                 "\tLY"                  // 9\r
21                 "\tLN"                  // 10\r
22                 "\tLA"                  // 11\r
23                 "\tRY"                  // 12\r
24                 "\tRN"                  // 13\r
25                 "\tRA"                  // 14\r
26                 "\tDiv"                 // 15\r
27                 "\tY"                   // 16\r
28                 "\n"\r
29                 );\r
30         }\r
31 \r
32 void WriteChimeHit(FILE *f, const ChimeHit2 &Hit)\r
33         {\r
34         if (f == 0)\r
35                 return;\r
36 \r
37         if (Hit.Div <= 0.0)\r
38                 {\r
39                 fprintf(f, "0.0000");           // 0\r
40 \r
41                 fprintf(f,\r
42                   "\t%s", Hit.QLabel.c_str());  // 1\r
43 \r
44                 fprintf(f,\r
45                   "\t*"                                         // 2\r
46                   "\t*"                                         // 3\r
47                   "\t*"                                         // 4\r
48                   "\t*"                                         // 5\r
49                   "\t*"                                         // 6\r
50                   "\t*"                                         // 7\r
51                   "\t*"                                         // 8\r
52                   "\t*"                                         // 9\r
53                   "\t*"                                         // 10\r
54                   "\t*"                                         // 11\r
55                   "\t*"                                         // 12\r
56                   "\t*"                                         // 13\r
57                   "\t*"                                         // 14\r
58                   "\t*"                                         // 15\r
59                   "\tN"                                         // 16\r
60                   "\n"\r
61                   );\r
62                 return;\r
63                 }\r
64 \r
65         fprintf(f, "%.4f", Hit.Score);          // 0\r
66 \r
67         fputc('\t', f);\r
68         fputs(Hit.QLabel.c_str(), f);           // 1\r
69 \r
70         fputc('\t', f);\r
71         fputs(Hit.ALabel.c_str(), f);           // 2\r
72 \r
73         fputc('\t', f);\r
74         fputs(Hit.BLabel.c_str(), f);           // 3\r
75 \r
76         fprintf(f, "\t%.1f", Hit.PctIdQM);      // 4\r
77         fprintf(f, "\t%.1f", Hit.PctIdQA);      // 5\r
78         fprintf(f, "\t%.1f", Hit.PctIdQB);      // 6\r
79         fprintf(f, "\t%.1f", Hit.PctIdAB);      // 7\r
80         fprintf(f, "\t%.1f", Hit.PctIdQT);      // 8\r
81 \r
82         fprintf(f, "\t%u", Hit.CS_LY);          // 9\r
83         fprintf(f, "\t%u", Hit.CS_LN);          // 10\r
84         fprintf(f, "\t%u", Hit.CS_LA);          // 11\r
85 \r
86         fprintf(f, "\t%u", Hit.CS_RY);          // 12\r
87         fprintf(f, "\t%u", Hit.CS_RN);          // 13\r
88         fprintf(f, "\t%u", Hit.CS_RA);          // 14\r
89 \r
90         fprintf(f, "\t%.2f", Hit.Div);          // 15\r
91 \r
92         fprintf(f, "\t%c", yon(Hit.Accept())); // 16\r
93         fputc('\n', f);\r
94         }\r
95 \r
96 unsigned GetUngappedLength(const byte *Seq, unsigned L)\r
97         {\r
98         unsigned UL = 0;\r
99         for (unsigned i = 0; i < L; ++i)\r
100                 if (!isgap(Seq[i]))\r
101                         ++UL;\r
102         return UL;\r
103         }\r
104 \r
105 void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit)\r
106         {\r
107         if (f == 0)\r
108                 return;\r
109 \r
110         if (Hit.Div <= 0.0)\r
111                 return;\r
112 \r
113         const string &Q3 = Hit.Q3;\r
114         const string &A3 = Hit.A3;\r
115         const string &B3 = Hit.B3;\r
116 \r
117         const byte *Q3Seq = (const byte *) Q3.c_str();\r
118         const byte *A3Seq = (const byte *) A3.c_str();\r
119         const byte *B3Seq = (const byte *) B3.c_str();\r
120 \r
121 // Aligned\r
122         unsigned ColCount = SIZE(Q3);\r
123         asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
124 \r
125         unsigned LQ = GetUngappedLength(Q3Seq, ColCount);\r
126         unsigned LA = GetUngappedLength(A3Seq, ColCount);\r
127         unsigned LB = GetUngappedLength(B3Seq, ColCount);\r
128 \r
129         fprintf(f, "\n");\r
130         fprintf(f, "------------------------------------------------------------------------\n");\r
131         fprintf(f, "Query   (%5u nt) %s\n", LQ, Hit.QLabel.c_str());\r
132         fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str());\r
133         fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str());\r
134 \r
135 // Strip terminal gaps in query\r
136         unsigned FromCol = UINT_MAX;\r
137         unsigned ToCol = UINT_MAX;\r
138         for (unsigned Col = 0; Col < ColCount; ++Col)\r
139                 {\r
140                 if (!isgap(Q3Seq[Col]))\r
141                         {\r
142                         if (FromCol == UINT_MAX)\r
143                                 FromCol = Col;\r
144                         ToCol = Col;\r
145                         }\r
146                 }\r
147 \r
148         unsigned QPos = 0;\r
149         unsigned APos = 0;\r
150         unsigned BPos = 0;\r
151         for (unsigned Col = 0; Col < FromCol; ++Col)\r
152                 {\r
153                 if (!isgap(A3Seq[Col]))\r
154                         ++APos;\r
155                 if (!isgap(B3Seq[Col]))\r
156                         ++BPos;\r
157                 }\r
158 \r
159         unsigned Range = ToCol - FromCol + 1;\r
160         unsigned RowCount = (Range + 79)/80;\r
161         unsigned RowFromCol = FromCol;\r
162         for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex)\r
163                 {\r
164                 fprintf(f, "\n");\r
165                 unsigned RowToCol = RowFromCol + 79;\r
166                 if (RowToCol > ToCol)\r
167                         RowToCol = ToCol;\r
168 \r
169         // A row\r
170                 fprintf(f, "A %5u ", APos + 1);\r
171                 for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
172                         {\r
173                         char q = Q3Seq[Col];\r
174                         char a = A3Seq[Col];\r
175                         if (a != q)\r
176                                 a = tolower(a);\r
177                         fprintf(f, "%c", a);\r
178                         if (!isgap(a))\r
179                                 ++APos;\r
180                         }\r
181                 fprintf(f, " %u\n", APos);\r
182 \r
183         // Q row\r
184                 fprintf(f, "Q %5u ", QPos + 1);\r
185                 for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
186                         {\r
187                         char q = Q3Seq[Col];\r
188                         fprintf(f, "%c", q);\r
189                         if (!isgap(q))\r
190                                 ++QPos;\r
191                         }\r
192                 fprintf(f, " %u\n", QPos);\r
193 \r
194         // B row\r
195                 fprintf(f, "B %5u ", BPos + 1);\r
196                 for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
197                         {\r
198                         char q = Q3Seq[Col];\r
199                         char b = B3Seq[Col];\r
200                         if (b != q)\r
201                                 b = tolower(b);\r
202                         fprintf(f, "%c", b);\r
203                         if (!isgap(b))\r
204                                 ++BPos;\r
205                         }\r
206                 fprintf(f, " %u\n", BPos);\r
207 \r
208         // Diffs\r
209                 fprintf(f, "Diffs   ");\r
210                 for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
211                         {\r
212                         char q = Q3Seq[Col];\r
213                         char a = A3Seq[Col];\r
214                         char b = B3Seq[Col];\r
215 \r
216                         char c = ' ';\r
217                         if (isgap(q) || isgap(a) || isgap(b))\r
218                                 c = ' ';\r
219                         else if (Col < Hit.ColXLo)\r
220                                 {\r
221                                 if (q == a && q == b)\r
222                                         c = ' ';\r
223                                 else if (q == a && q != b)\r
224                                         c = 'A';\r
225                                 else if (q == b && q != a)\r
226                                         c = 'b';\r
227                                 else if (a == b && q != a)\r
228                                         c = 'N';\r
229                                 else\r
230                                         c = '?';\r
231                                 }\r
232                         else if (Col > Hit.ColXHi)\r
233                                 {\r
234                                 if (q == a && q == b)\r
235                                         c = ' ';\r
236                                 else if (q == b && q != a)\r
237                                         c = 'B';\r
238                                 else if (q == a && q != b)\r
239                                         c = 'a';\r
240                                 else if (a == b && q != a)\r
241                                         c = 'N';\r
242                                 else\r
243                                         c = '?';\r
244                                 }\r
245 \r
246                         fprintf(f, "%c", c);\r
247                         }\r
248                 fprintf(f, "\n");\r
249 \r
250         // SNPs\r
251                 fprintf(f, "Votes   ");\r
252                 for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
253                         {\r
254                         char q = Q3Seq[Col];\r
255                         char a = A3Seq[Col];\r
256                         char b = B3Seq[Col];\r
257 \r
258                         bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]));\r
259                         bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]));\r
260 \r
261                         char c = ' ';\r
262                         if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap)\r
263                                 c = ' ';\r
264                         else if (Col < Hit.ColXLo)\r
265                                 {\r
266                                 if (q == a && q == b)\r
267                                         c = ' ';\r
268                                 else if (q == a && q != b)\r
269                                         c = '+';\r
270                                 else if (q == b && q != a)\r
271                                         c = '!';\r
272                                 else\r
273                                         c = '0';\r
274                                 }\r
275                         else if (Col > Hit.ColXHi)\r
276                                 {\r
277                                 if (q == a && q == b)\r
278                                         c = ' ';\r
279                                 else if (q == b && q != a)\r
280                                         c = '+';\r
281                                 else if (q == a && q != b)\r
282                                         c = '!';\r
283                                 else\r
284                                         c = '0';\r
285                                 }\r
286 \r
287                         fprintf(f, "%c", c);\r
288                         }\r
289                 fprintf(f, "\n");\r
290 \r
291         // LR row\r
292                 fprintf(f, "Model   ");\r
293                 for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
294                         {\r
295                         if (Col < Hit.ColXLo)\r
296                                 fprintf(f, "A");\r
297                         else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi)\r
298                                 fprintf(f, "x");\r
299                         else\r
300                                 fprintf(f, "B");\r
301                         }\r
302 \r
303                 fprintf(f, "\n");\r
304 \r
305                 RowFromCol += 80;\r
306                 }\r
307         fprintf(f, "\n");\r
308 \r
309         double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB);\r
310         double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP;\r
311 \r
312         unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA;\r
313         unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA;\r
314 \r
315         double PctL = Pct(Hit.CS_LY, LTot);\r
316         double PctR = Pct(Hit.CS_RY, RTot);\r
317 \r
318         fprintf(f,\r
319           "Ids.  QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n",\r
320           Hit.PctIdQA,\r
321           Hit.PctIdQB,\r
322           Hit.PctIdAB,\r
323           Hit.PctIdQM,\r
324           Div);\r
325 \r
326         fprintf(f,\r
327           "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n",\r
328           LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL,\r
329           RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR,\r
330           Hit.Score);\r
331         }\r