]> git.donarmstrong.com Git - mothur.git/blob - refchimeratest.cpp
some alterations to chimera.slayer
[mothur.git] / refchimeratest.cpp
1 /*
2  *  refchimeratest.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 1/31/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "refchimeratest.h"
11 #include "mothur.h"
12
13 int MAXINT = numeric_limits<int>::max();
14
15 //***************************************************************************************************************
16
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileName){
18
19         m = MothurOut::getInstance();
20
21         m->openOutputFile(chimeraReportFileName, chimeraReportFile);
22         numRefSeqs = refs.size();
23
24         referenceSeqs.resize(numRefSeqs);
25         referenceNames.resize(numRefSeqs);
26         for(int i=0;i<numRefSeqs;i++){
27                 referenceSeqs[i] = refs[i].getAligned();
28                 referenceNames[i] = refs[i].getName();
29         }
30         
31         alignLength = referenceSeqs[0].length();
32
33         chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
34 //      chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
35
36 }
37
38 //***************************************************************************************************************
39
40 int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
41         
42         vector<vector<int> > left(numRefSeqs);
43         vector<int> singleLeft, bestLeft;
44         vector<int> singleRight, bestRight;
45         
46         vector<vector<int> > right(numRefSeqs);
47         for(int i=0;i<numRefSeqs;i++){
48                 left[i].assign(alignLength, 0);
49         }
50         right = left;
51         
52         int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
53         
54         int leftParentBi, rightParentBi, breakPointBi;
55         int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
56         
57 //      int minMismatchToTrimera = MAXINT;
58 //      int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
59         
60         int nMera = 0;
61         string chimeraRefSeq = "";
62         
63         if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
64         
65                 nMera = 2;
66                 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
67                 
68 //              minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight);
69 //
70 //              if(minMismatchToChimera - minMismatchToTrimera <= 3){
71 //                      nMera = 2;
72 //                      chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
73 //              }
74 //              else{                   
75 //                      nMera = 3;
76 //                      chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB);
77 //              }
78                 
79         }
80         else{
81                 nMera = 1;
82                 chimeraRefSeq = referenceSeqs[bestMatch];
83         }
84         
85         
86         double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
87         
88 //      double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);         
89         
90         chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
91         chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
92         chimeraReportFile << minMismatchToChimera << '\t';
93         
94 //      if(nMera == 1){
95 //              chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA";
96 //      }
97 //      else{
98 //              chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera;       
99 //      }
100         
101         chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
102                 
103         return nMera;
104 }
105
106 /**************************************************************************************************/
107
108 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
109         
110         int bestSequenceMismatch = MAXINT;
111         
112         for(int i=0;i<numRefSeqs;i++){
113                 
114                 int lDiffs = 0;
115                 
116                 for(int l=0;l<alignLength;l++){
117 //                      if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
118                                                 
119                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
120                                 lDiffs++;
121                         }
122                         left[i][l] = lDiffs;
123                 }
124                 
125                 int rDiffs = 0;
126                 int index = 0;
127                 for(int l=alignLength-1;l>=0;l--){
128 //                      if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
129                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
130                                 rDiffs++;
131                         }                       
132                         right[i][index++] = rDiffs;
133                 }
134                 
135                 if(lDiffs < bestSequenceMismatch){
136                         bestSequenceMismatch = lDiffs;
137                         bestRefSeq = i;
138                 }
139         }
140         return bestSequenceMismatch;
141 }
142
143 /**************************************************************************************************/
144
145 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
146         
147         singleLeft.resize(alignLength, MAXINT);
148         bestLeft.resize(alignLength, -1);
149         
150         for(int l=0;l<alignLength;l++){
151                 for(int i=0;i<numRefSeqs;i++){
152                         if(left[i][l] <= singleLeft[l]){
153                                 singleLeft[l] = left[i][l];
154                                 bestLeft[l] = i;
155                         }
156                 }
157         }
158         
159         singleRight.resize(alignLength, MAXINT);
160         bestRight.resize(alignLength, -1);
161         
162         for(int l=0;l<alignLength;l++){
163                 for(int i=0;i<numRefSeqs;i++){
164                         if(right[i][l] <= singleRight[l]){
165                                 singleRight[l] = right[i][l];
166                                 bestRight[l] = i;
167                         }
168                 }
169         }
170         
171         int bestChimeraMismatches = MAXINT;
172         leftParent = -1;
173         rightParent = -1;
174         breakPoint = -1;
175         
176         for(int l=0;l<alignLength;l++){
177                 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
178                 if(chimera < bestChimeraMismatches){
179                         bestChimeraMismatches = chimera;
180                         breakPoint = l;
181                         leftParent = bestLeft[l];
182                         rightParent = bestRight[alignLength - l - 1];
183                 }
184         }
185         
186         return bestChimeraMismatches;
187 }
188
189 /**************************************************************************************************/
190
191 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
192         
193         int bestTrimeraMismatches = MAXINT;
194         
195         leftParent = -1;
196         middleParent = -1;
197         rightParent = -1;
198         
199         breakPointA = -1;
200         breakPointB = -1;
201         
202         vector<vector<int> > minDelta(alignLength);
203         vector<vector<int> > minDeltaSeq(alignLength);
204         
205         for(int i=0;i<alignLength;i++){
206                 minDelta[i].assign(alignLength, MAXINT);
207                 minDeltaSeq[i].assign(alignLength, -1);
208         }
209         
210         for(int x=0;x<alignLength;x++){
211                 for(int y=x;y<alignLength-1;y++){
212                         
213                         for(int i=0;i<numRefSeqs;i++){
214                                 int delta = left[i][y] - left[i][x];
215                                 
216                                 if(delta <= minDelta[x][y]){
217                                         minDelta[x][y] = delta;
218                                         minDeltaSeq[x][y] = i;                                  
219                                 }                               
220                         }
221                         minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
222                         
223                         if(minDelta[x][y] < bestTrimeraMismatches){
224                                 bestTrimeraMismatches = minDelta[x][y];
225                                 
226                                 breakPointA = x;
227                                 breakPointB = y;
228                                 
229                                 leftParent = bestLeft[x];
230                                 middleParent = minDeltaSeq[x][y];
231                                 rightParent = bestRight[alignLength - y - 2];                           
232                         }
233                 }               
234         }
235         return bestTrimeraMismatches;
236 }
237
238 /**************************************************************************************************/
239
240 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
241         
242         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
243         return chimeraRefSeq;
244         
245 }
246
247 /**************************************************************************************************/
248
249 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
250         
251         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
252         
253         return chimeraRefSeq;
254 }
255
256 /**************************************************************************************************/
257
258 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
259         
260         int match = 0;
261         int mismatch = 0;
262         
263         for(int i=0;i<alignLength;i++){
264 //              if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
265                 if(chimeraRefSeq[i] != '.' && querySeq[i] != '.'){
266                         if(querySeq[i] == '-' && chimeraRefSeq[i] == '-' && chimeraRefSeq[i] != 'N'){   /*      do nothing      */      }
267                         else if(querySeq[i] == chimeraRefSeq[i]){
268                                 match++;
269                         }
270                         else{
271                                 mismatch++;
272                         }                       
273                 }
274         }
275         
276         return (double)mismatch / (double)(mismatch + match);   
277 }
278
279 //***************************************************************************************************************
280
281 int RefChimeraTest::getClosestRefIndex(){
282
283         return bestMatch;
284         
285 }
286
287 //***************************************************************************************************************