]> git.donarmstrong.com Git - mothur.git/blob - refchimeratest.cpp
removed trimera checking from refchimeratest
[mothur.git] / refchimeratest.cpp
1 /*
2  *  refchimeratest.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 1/31/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "refchimeratest.h"
11 #include "mothur.h"
12
13 int MAXINT = numeric_limits<int>::max();
14
15 //***************************************************************************************************************
16
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileName){
18
19         m = MothurOut::getInstance();
20
21         m->openOutputFile(chimeraReportFileName, chimeraReportFile);
22         numRefSeqs = refs.size();
23
24         referenceSeqs.resize(numRefSeqs);
25         referenceNames.resize(numRefSeqs);
26         for(int i=0;i<numRefSeqs;i++){
27                 referenceSeqs[i] = refs[i].getAligned();
28                 referenceNames[i] = refs[i].getName();
29         }
30         
31         alignLength = referenceSeqs[0].length();
32
33         chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents";
34 //      chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
35
36 }
37
38 //***************************************************************************************************************
39
40 int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
41         
42         vector<vector<int> > left(numRefSeqs);
43         vector<int> singleLeft, bestLeft;
44         vector<int> singleRight, bestRight;
45         
46         vector<vector<int> > right(numRefSeqs);
47         for(int i=0;i<numRefSeqs;i++){
48                 left[i].assign(alignLength, 0);
49         }
50         right = left;
51         
52         int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
53         
54         int leftParentBi, rightParentBi, breakPointBi;
55         int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
56         
57 //      int minMismatchToTrimera = MAXINT;
58 //      int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
59         
60         int nMera = 0;
61         string chimeraRefSeq = "";
62         
63         if(bestSequenceMismatch - minMismatchToChimera <= 3){
64                 nMera = 1;
65                 chimeraRefSeq = referenceSeqs[bestMatch];
66         }
67         else {
68
69                 nMera = 2;
70                 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
71                 
72 //              minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight);
73 //
74 //              if(minMismatchToChimera - minMismatchToTrimera <= 3){
75 //                      nMera = 2;
76 //                      chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
77 //              }
78 //              else{                   
79 //                      nMera = 3;
80 //                      chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB);
81 //              }
82                 
83         }
84         double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
85         
86 //      double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);         
87         
88         chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
89         chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
90         chimeraReportFile << minMismatchToChimera << '\t';
91         
92 //      if(nMera == 1){
93 //              chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA";
94 //      }
95 //      else{
96 //              chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera;       
97 //      }
98         
99         chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
100                 
101         return nMera;
102 }
103
104 /**************************************************************************************************/
105
106 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
107         
108         int bestSequenceMismatch = MAXINT;
109         
110         for(int i=0;i<numRefSeqs;i++){
111                 
112                 int lDiffs = 0;
113                 for(int l=0;l<alignLength;l++){
114                         if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
115                                 lDiffs++;
116                         }
117                         left[i][l] = lDiffs;
118                 }
119                 
120                 int rDiffs = 0;
121                 int index = 0;
122                 for(int l=alignLength-1;l>=0;l--){
123                         if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
124                                 rDiffs++;
125                         }                       
126                         right[i][index++] = rDiffs;
127                 }
128                 if(lDiffs < bestSequenceMismatch){
129                         bestSequenceMismatch = lDiffs;
130                         bestRefSeq = i;
131                 }
132         }
133         return bestSequenceMismatch;
134 }
135
136 /**************************************************************************************************/
137
138 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
139         
140         singleLeft.resize(alignLength, MAXINT);
141         bestLeft.resize(alignLength, -1);
142         
143         for(int l=0;l<alignLength;l++){
144                 for(int i=0;i<numRefSeqs;i++){
145                         if(left[i][l] <= singleLeft[l]){
146                                 singleLeft[l] = left[i][l];
147                                 bestLeft[l] = i;
148                         }
149                 }
150         }
151         
152         singleRight.resize(alignLength, MAXINT);
153         bestRight.resize(alignLength, -1);
154         
155         for(int l=0;l<alignLength;l++){
156                 for(int i=0;i<numRefSeqs;i++){
157                         if(right[i][l] <= singleRight[l]){
158                                 singleRight[l] = right[i][l];
159                                 bestRight[l] = i;
160                         }
161                 }
162         }
163         
164         int bestChimeraMismatches = MAXINT;
165         leftParent = -1;
166         rightParent = -1;
167         breakPoint = -1;
168         
169         for(int l=0;l<alignLength;l++){
170                 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
171                 if(chimera < bestChimeraMismatches){
172                         bestChimeraMismatches = chimera;
173                         breakPoint = l;
174                         leftParent = bestLeft[l];
175                         rightParent = bestRight[alignLength - l - 1];
176                 }
177         }
178         
179         return bestChimeraMismatches;
180 }
181
182 /**************************************************************************************************/
183
184 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
185         
186         int bestTrimeraMismatches = MAXINT;
187         
188         leftParent = -1;
189         middleParent = -1;
190         rightParent = -1;
191         
192         breakPointA = -1;
193         breakPointB = -1;
194         
195         vector<vector<int> > minDelta(alignLength);
196         vector<vector<int> > minDeltaSeq(alignLength);
197         
198         for(int i=0;i<alignLength;i++){
199                 minDelta[i].assign(alignLength, MAXINT);
200                 minDeltaSeq[i].assign(alignLength, -1);
201         }
202         
203         for(int x=0;x<alignLength;x++){
204                 for(int y=x;y<alignLength-1;y++){
205                         
206                         for(int i=0;i<numRefSeqs;i++){
207                                 int delta = left[i][y] - left[i][x];
208                                 
209                                 if(delta <= minDelta[x][y]){
210                                         minDelta[x][y] = delta;
211                                         minDeltaSeq[x][y] = i;                                  
212                                 }                               
213                         }
214                         minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
215                         
216                         if(minDelta[x][y] < bestTrimeraMismatches){
217                                 bestTrimeraMismatches = minDelta[x][y];
218                                 
219                                 breakPointA = x;
220                                 breakPointB = y;
221                                 
222                                 leftParent = bestLeft[x];
223                                 middleParent = minDeltaSeq[x][y];
224                                 rightParent = bestRight[alignLength - y - 2];                           
225                         }
226                 }               
227         }
228         return bestTrimeraMismatches;
229 }
230
231 /**************************************************************************************************/
232
233 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
234         
235         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
236         return chimeraRefSeq;
237         
238 }
239
240 /**************************************************************************************************/
241
242 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
243         
244         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
245         
246         return chimeraRefSeq;
247 }
248
249 /**************************************************************************************************/
250
251 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
252         
253         int match = 0;
254         int mismatch = 0;
255         
256         for(int i=0;i<alignLength;i++){
257                 if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
258                         if(querySeq[i] == '-' && chimeraRefSeq[i] == '-'){      /*      do nothing      */      }
259                         else if(querySeq[i] == chimeraRefSeq[i]){
260                                 match++;
261                         }
262                         else{
263                                 mismatch++;
264                         }                       
265                 }
266         }
267         
268         return (double)mismatch / (double)(mismatch + match);   
269 }
270
271 //***************************************************************************************************************
272
273 int RefChimeraTest::getClosestRefIndex(){
274
275         return bestMatch;
276         
277 }
278
279 //***************************************************************************************************************