]> git.donarmstrong.com Git - mothur.git/blob - make3way.cpp
dbadd1ab26269b10ee98229c638582ea8774b30b
[mothur.git] / make3way.cpp
1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.\r
2 \r
3 #include "myutils.h"\r
4 #include "sfasta.h"\r
5 #include "path.h"\r
6 #include "dp.h"\r
7 \r
8 void Make3Way(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
9   const string &PathQA, const string &PathQB,\r
10   string &Q3, string &A3, string &B3)\r
11         {\r
12         Q3.clear();\r
13         A3.clear();\r
14         B3.clear();\r
15 \r
16 #if     DEBUG\r
17         {\r
18         unsigned QLen = 0;\r
19         unsigned ALen = 0;\r
20         for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
21                 {\r
22                 char c = PathQA[i];\r
23                 if (c == 'M' || c == 'D')\r
24                         ++QLen;\r
25                 if (c == 'M' || c == 'I')\r
26                         ++ALen;\r
27                 }\r
28         asserta(QLen == QSD.L);\r
29         asserta(ALen == ASD.L);\r
30         }\r
31         {\r
32         unsigned QLen = 0;\r
33         unsigned BLen = 0;\r
34         for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
35                 {\r
36                 char c = PathQB[i];\r
37                 if (c == 'M' || c == 'D')\r
38                         ++QLen;\r
39                 if (c == 'M' || c == 'I')\r
40                         ++BLen;\r
41                 }\r
42         asserta(QLen == QSD.L);\r
43         asserta(BLen == BSD.L);\r
44         }\r
45 #endif\r
46 \r
47         const byte *Q = QSD.Seq;\r
48         const byte *A = ASD.Seq;\r
49         const byte *B = BSD.Seq;\r
50 \r
51         unsigned LQ = QSD.L;\r
52         unsigned LA = ASD.L;\r
53         unsigned LB = BSD.L;\r
54 \r
55         vector<unsigned> InsertCountsA(LQ+1, 0);\r
56         unsigned QPos = 0;\r
57         for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
58                 {\r
59                 char c = PathQA[i];\r
60                 if (c == 'M' || c == 'D')\r
61                         ++QPos;\r
62                 else\r
63                         {\r
64                         asserta(c == 'I');\r
65                         asserta(QPos <= LQ);\r
66                         ++(InsertCountsA[QPos]);\r
67                         }\r
68                 }\r
69 \r
70         vector<unsigned> InsertCountsB(LQ+1, 0);\r
71         QPos = 0;\r
72         for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
73                 {\r
74                 char c = PathQB[i];\r
75                 if (c == 'M' || c == 'D')\r
76                         ++QPos;\r
77                 else\r
78                         {\r
79                         asserta(c == 'I');\r
80                         asserta(QPos <= LQ);\r
81                         ++(InsertCountsB[QPos]);\r
82                         }\r
83                 }\r
84 \r
85         vector<unsigned> InsertCounts;\r
86         for (unsigned i = 0; i <= LQ; ++i)\r
87                 {\r
88                 unsigned is = max(InsertCountsA[i], InsertCountsB[i]);\r
89                 InsertCounts.push_back(is);\r
90                 }\r
91 \r
92         for (unsigned i = 0; i < LQ; ++i)\r
93                 {\r
94                 for (unsigned k = 0; k < InsertCounts[i]; ++k)\r
95                         Q3.push_back('-');\r
96                 asserta(i < LQ);\r
97                 Q3.push_back(toupper(Q[i]));\r
98                 }\r
99         for (unsigned k = 0; k < InsertCounts[LQ]; ++k)\r
100                 Q3.push_back('-');\r
101 \r
102 // A\r
103         QPos = 0;\r
104         unsigned APos = 0;\r
105         unsigned is = 0;\r
106         for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
107                 {\r
108                 char c = PathQA[i];\r
109                 if (c == 'M' || c == 'D')\r
110                         {\r
111                         unsigned isq = InsertCounts[QPos];\r
112                         asserta(is <= isq);\r
113                         for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
114                                 A3.push_back('-');\r
115                         is = 0;\r
116                         ++QPos;\r
117                         }\r
118                 if (c == 'M')\r
119                         {\r
120                         asserta(APos < LA);\r
121                         A3.push_back(toupper(A[APos++]));\r
122                         }\r
123                 else if (c == 'D')\r
124                         A3.push_back('-');\r
125                 else if (c == 'I')\r
126                         {\r
127                         ++is;\r
128                         asserta(APos < LA);\r
129                         A3.push_back(toupper(A[APos++]));\r
130                         }\r
131                 }\r
132         asserta(is <= InsertCounts[LQ]);\r
133         for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
134                 A3.push_back('-');\r
135         asserta(QPos == LQ);\r
136         asserta(APos == LA);\r
137 \r
138 // B\r
139         QPos = 0;\r
140         unsigned BPos = 0;\r
141         is = 0;\r
142         for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
143                 {\r
144                 char c = PathQB[i];\r
145                 if (c == 'M' || c == 'D')\r
146                         {\r
147                         asserta(is <= InsertCounts[QPos]);\r
148                         for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
149                                 B3.push_back('-');\r
150                         is = 0;\r
151                         ++QPos;\r
152                         }\r
153                 if (c == 'M')\r
154                         {\r
155                         asserta(BPos < LB);\r
156                         B3.push_back(toupper(B[BPos++]));\r
157                         }\r
158                 else if (c == 'D')\r
159                         B3.push_back('-');\r
160                 else if (c == 'I')\r
161                         {\r
162                         ++is;\r
163                         asserta(BPos < LB);\r
164                         B3.push_back(toupper(B[BPos++]));\r
165                         }\r
166                 }\r
167         asserta(is <= InsertCounts[LQ]);\r
168         for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
169                 B3.push_back('-');\r
170         asserta(APos == LA);\r
171         asserta(BPos == LB);\r
172 \r
173         asserta(SIZE(Q3) == SIZE(A3));\r
174         asserta(SIZE(Q3) == SIZE(B3));\r
175         }\r