]> git.donarmstrong.com Git - mothur.git/blob - refchimeratest.cpp
Revert to previous commit
[mothur.git] / refchimeratest.cpp
1 /*
2  *  refchimeratest.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 1/31/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "refchimeratest.h"
11 #include "mothur.h"
12
13 int MAXINT = numeric_limits<int>::max();
14
15 //***************************************************************************************************************
16
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs){
18
19         m = MothurOut::getInstance();
20
21         numRefSeqs = refs.size();
22
23         referenceSeqs.resize(numRefSeqs);
24         referenceNames.resize(numRefSeqs);
25         for(int i=0;i<numRefSeqs;i++){
26                 referenceSeqs[i] = refs[i].getAligned();
27                 referenceNames[i] = refs[i].getName();
28         }
29         
30         alignLength = referenceSeqs[0].length();
31
32
33 }
34 //***************************************************************************************************************
35
36 int RefChimeraTest::printHeader(ofstream& chimeraReportFile){
37         try {
38                 chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
39                 return 0; 
40         }catch(exception& e) {
41                 m->errorOut(e, "RefChimeraTest", "execute");
42                 exit(1);
43         }
44 }
45 //***************************************************************************************************************
46
47 int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& chimeraReportFile){
48         
49         vector<vector<int> > left(numRefSeqs);
50         vector<int> singleLeft, bestLeft;
51         vector<int> singleRight, bestRight;
52         
53         vector<vector<int> > right(numRefSeqs);
54         for(int i=0;i<numRefSeqs;i++){
55                 left[i].assign(alignLength, 0);
56         }
57         right = left;
58         
59         int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
60         
61         int leftParentBi, rightParentBi, breakPointBi;
62         int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
63         
64 //      int minMismatchToTrimera = MAXINT;
65 //      int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
66         
67         int nMera = 0;
68         string chimeraRefSeq = "";
69         
70         if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
71         
72                 nMera = 2;
73                 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
74                 
75 //              minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight);
76 //
77 //              if(minMismatchToChimera - minMismatchToTrimera <= 3){
78 //                      nMera = 2;
79 //                      chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
80 //              }
81 //              else{                   
82 //                      nMera = 3;
83 //                      chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB);
84 //              }
85                 
86         }
87         else{
88                 nMera = 1;
89                 chimeraRefSeq = referenceSeqs[bestMatch];
90         }
91         
92         
93         double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
94         
95 //      double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);         
96         
97         chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
98         chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
99         chimeraReportFile << minMismatchToChimera << '\t';
100         
101 //      if(nMera == 1){
102 //              chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA";
103 //      }
104 //      else{
105 //              chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera;       
106 //      }
107         
108         chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
109                 
110         return nMera;
111 }
112
113 /**************************************************************************************************/
114
115 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
116         
117         int bestSequenceMismatch = MAXINT;
118         
119         for(int i=0;i<numRefSeqs;i++){
120                 
121                 int lDiffs = 0;
122                 
123                 for(int l=0;l<alignLength;l++){
124 //                      if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
125                                                 
126                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
127                                 lDiffs++;
128                         }
129                         left[i][l] = lDiffs;
130                 }
131                 
132                 int rDiffs = 0;
133                 int index = 0;
134                 for(int l=alignLength-1;l>=0;l--){
135 //                      if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
136                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
137                                 rDiffs++;
138                         }                       
139                         right[i][index++] = rDiffs;
140                 }
141                 
142                 if(lDiffs < bestSequenceMismatch){
143                         bestSequenceMismatch = lDiffs;
144                         bestRefSeq = i;
145                 }
146         }
147         return bestSequenceMismatch;
148 }
149
150 /**************************************************************************************************/
151
152 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
153         
154         singleLeft.resize(alignLength, MAXINT);
155         bestLeft.resize(alignLength, -1);
156         
157         for(int l=0;l<alignLength;l++){
158                 for(int i=0;i<numRefSeqs;i++){
159                         if(left[i][l] <= singleLeft[l]){
160                                 singleLeft[l] = left[i][l];
161                                 bestLeft[l] = i;
162                         }
163                 }
164         }
165         
166         singleRight.resize(alignLength, MAXINT);
167         bestRight.resize(alignLength, -1);
168         
169         for(int l=0;l<alignLength;l++){
170                 for(int i=0;i<numRefSeqs;i++){
171                         if(right[i][l] <= singleRight[l]){
172                                 singleRight[l] = right[i][l];
173                                 bestRight[l] = i;
174                         }
175                 }
176         }
177         
178         int bestChimeraMismatches = MAXINT;
179         leftParent = -1;
180         rightParent = -1;
181         breakPoint = -1;
182         
183         for(int l=0;l<alignLength;l++){
184                 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
185                 if(chimera < bestChimeraMismatches){
186                         bestChimeraMismatches = chimera;
187                         breakPoint = l;
188                         leftParent = bestLeft[l];
189                         rightParent = bestRight[alignLength - l - 1];
190                 }
191         }
192         
193         return bestChimeraMismatches;
194 }
195
196 /**************************************************************************************************/
197
198 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
199         
200         int bestTrimeraMismatches = MAXINT;
201         
202         leftParent = -1;
203         middleParent = -1;
204         rightParent = -1;
205         
206         breakPointA = -1;
207         breakPointB = -1;
208         
209         vector<vector<int> > minDelta(alignLength);
210         vector<vector<int> > minDeltaSeq(alignLength);
211         
212         for(int i=0;i<alignLength;i++){
213                 minDelta[i].assign(alignLength, MAXINT);
214                 minDeltaSeq[i].assign(alignLength, -1);
215         }
216         
217         for(int x=0;x<alignLength;x++){
218                 for(int y=x;y<alignLength-1;y++){
219                         
220                         for(int i=0;i<numRefSeqs;i++){
221                                 int delta = left[i][y] - left[i][x];
222                                 
223                                 if(delta <= minDelta[x][y]){
224                                         minDelta[x][y] = delta;
225                                         minDeltaSeq[x][y] = i;                                  
226                                 }                               
227                         }
228                         minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
229                         
230                         if(minDelta[x][y] < bestTrimeraMismatches){
231                                 bestTrimeraMismatches = minDelta[x][y];
232                                 
233                                 breakPointA = x;
234                                 breakPointB = y;
235                                 
236                                 leftParent = bestLeft[x];
237                                 middleParent = minDeltaSeq[x][y];
238                                 rightParent = bestRight[alignLength - y - 2];                           
239                         }
240                 }               
241         }
242         return bestTrimeraMismatches;
243 }
244
245 /**************************************************************************************************/
246
247 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
248         
249         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
250         return chimeraRefSeq;
251         
252 }
253
254 /**************************************************************************************************/
255
256 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
257         
258         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
259         
260         return chimeraRefSeq;
261 }
262
263 /**************************************************************************************************/
264
265 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
266         
267         int match = 0;
268         int mismatch = 0;
269         
270         for(int i=0;i<alignLength;i++){
271 //              if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
272                 if(chimeraRefSeq[i] != '.' && querySeq[i] != '.'){
273                         if(querySeq[i] == '-' && chimeraRefSeq[i] == '-' && chimeraRefSeq[i] != 'N'){   /*      do nothing      */      }
274                         else if(querySeq[i] == chimeraRefSeq[i]){
275                                 match++;
276                         }
277                         else{
278                                 mismatch++;
279                         }                       
280                 }
281         }
282         
283         return (double)mismatch / (double)(mismatch + match);   
284 }
285
286 //***************************************************************************************************************
287
288 int RefChimeraTest::getClosestRefIndex(){
289
290         return bestMatch;
291         
292 }
293
294 //***************************************************************************************************************