5 * Created by Pat Schloss on 1/31/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "refchimeratest.h"
13 int MAXINT = numeric_limits<int>::max();
15 //***************************************************************************************************************
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, bool aligned) : aligned(aligned){
19 m = MothurOut::getInstance();
21 numRefSeqs = refs.size();
23 referenceSeqs.resize(numRefSeqs);
24 referenceNames.resize(numRefSeqs);
25 for(int i=0;i<numRefSeqs;i++){
26 referenceSeqs[i] = refs[i].getAligned();
27 referenceNames[i] = refs[i].getName();
30 alignLength = referenceSeqs[0].length();
33 //***************************************************************************************************************
35 int RefChimeraTest::printHeader(ofstream& chimeraReportFile){
37 chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
39 }catch(exception& e) {
40 m->errorOut(e, "RefChimeraTest", "execute");
44 //***************************************************************************************************************
46 int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& chimeraReportFile){
48 vector<vector<int> > left(numRefSeqs);
49 vector<int> singleLeft, bestLeft;
50 vector<int> singleRight, bestRight;
52 vector<vector<int> > right(numRefSeqs);
53 for(int i=0;i<numRefSeqs;i++){
54 left[i].assign(alignLength, 0);
58 int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
60 int leftParentBi, rightParentBi, breakPointBi;
61 int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
64 string chimeraRefSeq = "";
66 if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
68 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
72 chimeraRefSeq = referenceSeqs[bestMatch];
76 double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
78 chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
79 chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
80 chimeraReportFile << minMismatchToChimera << '\t';
81 chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
86 /**************************************************************************************************/
88 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
90 int bestSequenceMismatch = MAXINT;
92 for(int i=0;i<numRefSeqs;i++){
96 for(int l=0;l<alignLength;l++){
97 if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
105 for(int l=alignLength-1;l>=0;l--){
106 if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
109 right[i][index++] = rDiffs;
112 if(lDiffs < bestSequenceMismatch){
113 bestSequenceMismatch = lDiffs;
117 return bestSequenceMismatch;
120 /**************************************************************************************************/
122 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
124 singleLeft.resize(alignLength, MAXINT);
125 bestLeft.resize(alignLength, -1);
127 for(int l=0;l<alignLength;l++){
128 for(int i=0;i<numRefSeqs;i++){
129 if(left[i][l] <= singleLeft[l]){
130 singleLeft[l] = left[i][l];
136 singleRight.resize(alignLength, MAXINT);
137 bestRight.resize(alignLength, -1);
139 for(int l=0;l<alignLength;l++){
140 for(int i=0;i<numRefSeqs;i++){
141 if(right[i][l] <= singleRight[l]){
142 singleRight[l] = right[i][l];
148 int bestChimeraMismatches = MAXINT;
153 for(int l=0;l<alignLength;l++){
154 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
155 if(chimera < bestChimeraMismatches){
156 bestChimeraMismatches = chimera;
158 leftParent = bestLeft[l];
159 rightParent = bestRight[alignLength - l - 1];
163 return bestChimeraMismatches;
166 /**************************************************************************************************/
168 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
170 int bestTrimeraMismatches = MAXINT;
179 vector<vector<int> > minDelta(alignLength);
180 vector<vector<int> > minDeltaSeq(alignLength);
182 for(int i=0;i<alignLength;i++){
183 minDelta[i].assign(alignLength, MAXINT);
184 minDeltaSeq[i].assign(alignLength, -1);
187 for(int x=0;x<alignLength;x++){
188 for(int y=x;y<alignLength-1;y++){
190 for(int i=0;i<numRefSeqs;i++){
191 int delta = left[i][y] - left[i][x];
193 if(delta <= minDelta[x][y]){
194 minDelta[x][y] = delta;
195 minDeltaSeq[x][y] = i;
198 minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
200 if(minDelta[x][y] < bestTrimeraMismatches){
201 bestTrimeraMismatches = minDelta[x][y];
206 leftParent = bestLeft[x];
207 middleParent = minDeltaSeq[x][y];
208 rightParent = bestRight[alignLength - y - 2];
212 return bestTrimeraMismatches;
215 /**************************************************************************************************/
217 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
219 string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
220 return chimeraRefSeq;
224 /**************************************************************************************************/
226 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
228 string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
230 return chimeraRefSeq;
233 /**************************************************************************************************/
235 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
240 for(int i=0;i<alignLength;i++){
241 // if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
242 if(chimeraRefSeq[i] != '.' && querySeq[i] != '.'){
243 if(querySeq[i] == '-' && chimeraRefSeq[i] == '-' && chimeraRefSeq[i] != 'N'){ /* do nothing */ }
244 else if(querySeq[i] == chimeraRefSeq[i]){
253 return (double)mismatch / (double)(mismatch + match);
256 //***************************************************************************************************************
258 int RefChimeraTest::getClosestRefIndex(){
264 //***************************************************************************************************************