]> git.donarmstrong.com Git - mothur.git/blob - refchimeratest.cpp
small fix to refchimeratest.cpp
[mothur.git] / refchimeratest.cpp
1 /*
2  *  refchimeratest.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 1/31/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "refchimeratest.h"
11 #include "mothur.h"
12
13 int MAXINT = numeric_limits<int>::max();
14
15 //***************************************************************************************************************
16
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileName){
18
19         m = MothurOut::getInstance();
20
21         m->openOutputFile(chimeraReportFileName, chimeraReportFile);
22         numRefSeqs = refs.size();
23
24         referenceSeqs.resize(numRefSeqs);
25         referenceNames.resize(numRefSeqs);
26         for(int i=0;i<numRefSeqs;i++){
27                 referenceSeqs[i] = refs[i].getAligned();
28                 referenceNames[i] = refs[i].getName();
29         }
30         
31         alignLength = referenceSeqs[0].length();
32
33         chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
34 //      chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
35
36 }
37
38 //***************************************************************************************************************
39
40 int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
41         
42         vector<vector<int> > left(numRefSeqs);
43         vector<int> singleLeft, bestLeft;
44         vector<int> singleRight, bestRight;
45         
46         vector<vector<int> > right(numRefSeqs);
47         for(int i=0;i<numRefSeqs;i++){
48                 left[i].assign(alignLength, 0);
49         }
50         right = left;
51         
52         int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
53         
54         int leftParentBi, rightParentBi, breakPointBi;
55         int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
56         
57 //      int minMismatchToTrimera = MAXINT;
58 //      int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
59         
60         int nMera = 0;
61         string chimeraRefSeq = "";
62         
63         if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
64         
65                 nMera = 2;
66                 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
67                 
68 //              minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight);
69 //
70 //              if(minMismatchToChimera - minMismatchToTrimera <= 3){
71 //                      nMera = 2;
72 //                      chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
73 //              }
74 //              else{                   
75 //                      nMera = 3;
76 //                      chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB);
77 //              }
78                 
79         }
80         else{
81                 nMera = 1;
82                 chimeraRefSeq = referenceSeqs[bestMatch];
83         }
84         
85         
86         double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
87         
88 //      double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);         
89         
90         chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
91         chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
92         chimeraReportFile << minMismatchToChimera << '\t';
93         
94 //      if(nMera == 1){
95 //              chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA";
96 //      }
97 //      else{
98 //              chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera;       
99 //      }
100         
101         chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
102                 
103         return nMera;
104 }
105
106 /**************************************************************************************************/
107
108 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
109         
110         int bestSequenceMismatch = MAXINT;
111         
112         for(int i=0;i<numRefSeqs;i++){
113                 
114                 int lDiffs = 0;
115                 for(int l=0;l<alignLength;l++){
116 //                      if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
117                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l]){
118                                 lDiffs++;
119                         }
120                         left[i][l] = lDiffs;
121                 }
122                 
123                 int rDiffs = 0;
124                 int index = 0;
125                 for(int l=alignLength-1;l>=0;l--){
126 //                      if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
127                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l]){
128                                 rDiffs++;
129                         }                       
130                         right[i][index++] = rDiffs;
131                 }
132                 if(lDiffs < bestSequenceMismatch){
133                         bestSequenceMismatch = lDiffs;
134                         bestRefSeq = i;
135                 }
136         }
137         return bestSequenceMismatch;
138 }
139
140 /**************************************************************************************************/
141
142 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
143         
144         singleLeft.resize(alignLength, MAXINT);
145         bestLeft.resize(alignLength, -1);
146         
147         for(int l=0;l<alignLength;l++){
148                 for(int i=0;i<numRefSeqs;i++){
149                         if(left[i][l] <= singleLeft[l]){
150                                 singleLeft[l] = left[i][l];
151                                 bestLeft[l] = i;
152                         }
153                 }
154         }
155         
156         singleRight.resize(alignLength, MAXINT);
157         bestRight.resize(alignLength, -1);
158         
159         for(int l=0;l<alignLength;l++){
160                 for(int i=0;i<numRefSeqs;i++){
161                         if(right[i][l] <= singleRight[l]){
162                                 singleRight[l] = right[i][l];
163                                 bestRight[l] = i;
164                         }
165                 }
166         }
167         
168         int bestChimeraMismatches = MAXINT;
169         leftParent = -1;
170         rightParent = -1;
171         breakPoint = -1;
172         
173         for(int l=0;l<alignLength;l++){
174                 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
175                 if(chimera < bestChimeraMismatches){
176                         bestChimeraMismatches = chimera;
177                         breakPoint = l;
178                         leftParent = bestLeft[l];
179                         rightParent = bestRight[alignLength - l - 1];
180                 }
181         }
182         
183         return bestChimeraMismatches;
184 }
185
186 /**************************************************************************************************/
187
188 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
189         
190         int bestTrimeraMismatches = MAXINT;
191         
192         leftParent = -1;
193         middleParent = -1;
194         rightParent = -1;
195         
196         breakPointA = -1;
197         breakPointB = -1;
198         
199         vector<vector<int> > minDelta(alignLength);
200         vector<vector<int> > minDeltaSeq(alignLength);
201         
202         for(int i=0;i<alignLength;i++){
203                 minDelta[i].assign(alignLength, MAXINT);
204                 minDeltaSeq[i].assign(alignLength, -1);
205         }
206         
207         for(int x=0;x<alignLength;x++){
208                 for(int y=x;y<alignLength-1;y++){
209                         
210                         for(int i=0;i<numRefSeqs;i++){
211                                 int delta = left[i][y] - left[i][x];
212                                 
213                                 if(delta <= minDelta[x][y]){
214                                         minDelta[x][y] = delta;
215                                         minDeltaSeq[x][y] = i;                                  
216                                 }                               
217                         }
218                         minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
219                         
220                         if(minDelta[x][y] < bestTrimeraMismatches){
221                                 bestTrimeraMismatches = minDelta[x][y];
222                                 
223                                 breakPointA = x;
224                                 breakPointB = y;
225                                 
226                                 leftParent = bestLeft[x];
227                                 middleParent = minDeltaSeq[x][y];
228                                 rightParent = bestRight[alignLength - y - 2];                           
229                         }
230                 }               
231         }
232         return bestTrimeraMismatches;
233 }
234
235 /**************************************************************************************************/
236
237 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
238         
239         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
240         return chimeraRefSeq;
241         
242 }
243
244 /**************************************************************************************************/
245
246 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
247         
248         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
249         
250         return chimeraRefSeq;
251 }
252
253 /**************************************************************************************************/
254
255 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
256         
257         int match = 0;
258         int mismatch = 0;
259         
260         for(int i=0;i<alignLength;i++){
261 //              if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
262                 if(chimeraRefSeq[i] != '.'){
263                         if(querySeq[i] == '-' && chimeraRefSeq[i] == '-'){      /*      do nothing      */      }
264                         else if(querySeq[i] == chimeraRefSeq[i]){
265                                 match++;
266                         }
267                         else{
268                                 mismatch++;
269                         }                       
270                 }
271         }
272         
273         return (double)mismatch / (double)(mismatch + match);   
274 }
275
276 //***************************************************************************************************************
277
278 int RefChimeraTest::getClosestRefIndex(){
279
280         return bestMatch;
281         
282 }
283
284 //***************************************************************************************************************