]> git.donarmstrong.com Git - mothur.git/blob - refchimeratest.cpp
Working on seq.error changes
[mothur.git] / refchimeratest.cpp
1 /*
2  *  refchimeratest.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 1/31/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "refchimeratest.h"
11 #include "mothur.h"
12
13 int MAXINT = numeric_limits<int>::max();
14
15 //***************************************************************************************************************
16
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, bool aligned) : aligned(aligned){
18
19         m = MothurOut::getInstance();
20
21         numRefSeqs = refs.size();
22
23         referenceSeqs.resize(numRefSeqs);
24         referenceNames.resize(numRefSeqs);
25         for(int i=0;i<numRefSeqs;i++){
26                 referenceSeqs[i] = refs[i].getAligned();
27                 referenceNames[i] = refs[i].getName();
28         }
29         
30         alignLength = referenceSeqs[0].length();
31
32 }
33 //***************************************************************************************************************
34
35 int RefChimeraTest::printHeader(ofstream& chimeraReportFile){
36         try {
37                 chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
38                 return 0; 
39         }catch(exception& e) {
40                 m->errorOut(e, "RefChimeraTest", "execute");
41                 exit(1);
42         }
43 }
44 //***************************************************************************************************************
45
46 int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& chimeraReportFile){
47         
48         vector<vector<int> > left(numRefSeqs);
49         vector<int> singleLeft, bestLeft;
50         vector<int> singleRight, bestRight;
51         
52         vector<vector<int> > right(numRefSeqs);
53         for(int i=0;i<numRefSeqs;i++){
54                 left[i].assign(alignLength, 0);
55         }
56         right = left;
57         
58         int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
59         
60         int leftParentBi, rightParentBi, breakPointBi;
61         int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
62         
63         int nMera = 0;
64         string chimeraRefSeq = "";
65         
66         if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
67                 nMera = 2;
68                 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
69         }
70         else{
71                 nMera = 1;
72                 chimeraRefSeq = referenceSeqs[bestMatch];
73         }
74         
75         
76         double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
77         
78         chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
79         chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
80         chimeraReportFile << minMismatchToChimera << '\t';
81     chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
82                 
83         return nMera;
84 }
85
86 /**************************************************************************************************/
87
88 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
89         
90         int bestSequenceMismatch = MAXINT;
91         
92         for(int i=0;i<numRefSeqs;i++){
93                 
94                 int lDiffs = 0;
95                 
96                 for(int l=0;l<alignLength;l++){
97                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
98                                 lDiffs++;
99                         }
100                         left[i][l] = lDiffs;
101                 }
102                 
103                 int rDiffs = 0;
104                 int index = 0;
105                 for(int l=alignLength-1;l>=0;l--){
106                         if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
107                                 rDiffs++;
108                         }                       
109                         right[i][index++] = rDiffs;
110                 }
111                 
112                 if(lDiffs < bestSequenceMismatch){
113                         bestSequenceMismatch = lDiffs;
114                         bestRefSeq = i;
115                 }
116         }
117         return bestSequenceMismatch;
118 }
119
120 /**************************************************************************************************/
121
122 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
123         
124         singleLeft.resize(alignLength, MAXINT);
125         bestLeft.resize(alignLength, -1);
126         
127         for(int l=0;l<alignLength;l++){
128                 for(int i=0;i<numRefSeqs;i++){
129                         if(left[i][l] <= singleLeft[l]){
130                                 singleLeft[l] = left[i][l];
131                                 bestLeft[l] = i;
132                         }
133                 }
134         }
135         
136         singleRight.resize(alignLength, MAXINT);
137         bestRight.resize(alignLength, -1);
138         
139         for(int l=0;l<alignLength;l++){
140                 for(int i=0;i<numRefSeqs;i++){
141                         if(right[i][l] <= singleRight[l]){
142                                 singleRight[l] = right[i][l];
143                                 bestRight[l] = i;
144                         }
145                 }
146         }
147         
148         int bestChimeraMismatches = MAXINT;
149         leftParent = -1;
150         rightParent = -1;
151         breakPoint = -1;
152         
153         for(int l=0;l<alignLength;l++){
154                 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
155                 if(chimera < bestChimeraMismatches){
156                         bestChimeraMismatches = chimera;
157                         breakPoint = l;
158                         leftParent = bestLeft[l];
159                         rightParent = bestRight[alignLength - l - 1];
160                 }
161         }
162         
163         return bestChimeraMismatches;
164 }
165
166 /**************************************************************************************************/
167
168 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
169         
170         int bestTrimeraMismatches = MAXINT;
171         
172         leftParent = -1;
173         middleParent = -1;
174         rightParent = -1;
175         
176         breakPointA = -1;
177         breakPointB = -1;
178         
179         vector<vector<int> > minDelta(alignLength);
180         vector<vector<int> > minDeltaSeq(alignLength);
181         
182         for(int i=0;i<alignLength;i++){
183                 minDelta[i].assign(alignLength, MAXINT);
184                 minDeltaSeq[i].assign(alignLength, -1);
185         }
186         
187         for(int x=0;x<alignLength;x++){
188                 for(int y=x;y<alignLength-1;y++){
189                         
190                         for(int i=0;i<numRefSeqs;i++){
191                                 int delta = left[i][y] - left[i][x];
192                                 
193                                 if(delta <= minDelta[x][y]){
194                                         minDelta[x][y] = delta;
195                                         minDeltaSeq[x][y] = i;                                  
196                                 }                               
197                         }
198                         minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
199                         
200                         if(minDelta[x][y] < bestTrimeraMismatches){
201                                 bestTrimeraMismatches = minDelta[x][y];
202                                 
203                                 breakPointA = x;
204                                 breakPointB = y;
205                                 
206                                 leftParent = bestLeft[x];
207                                 middleParent = minDeltaSeq[x][y];
208                                 rightParent = bestRight[alignLength - y - 2];                           
209                         }
210                 }               
211         }
212         return bestTrimeraMismatches;
213 }
214
215 /**************************************************************************************************/
216
217 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
218         
219         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
220         return chimeraRefSeq;
221         
222 }
223
224 /**************************************************************************************************/
225
226 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
227         
228         string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
229         
230         return chimeraRefSeq;
231 }
232
233 /**************************************************************************************************/
234
235 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
236         
237         int match = 0;
238         int mismatch = 0;
239         
240         for(int i=0;i<alignLength;i++){
241 //              if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
242                 if(chimeraRefSeq[i] != '.' && querySeq[i] != '.'){
243                         if(querySeq[i] == '-' && chimeraRefSeq[i] == '-' && chimeraRefSeq[i] != 'N'){   /*      do nothing      */      }
244                         else if(querySeq[i] == chimeraRefSeq[i]){
245                                 match++;
246                         }
247                         else{
248                                 mismatch++;
249                         }                       
250                 }
251         }
252         
253         return (double)mismatch / (double)(mismatch + match);   
254 }
255
256 //***************************************************************************************************************
257
258 int RefChimeraTest::getClosestRefIndex(){
259
260         return bestMatch;
261         
262 }
263
264 //***************************************************************************************************************