5 * Created by Pat Schloss on 1/31/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "refchimeratest.h"
13 int MAXINT = numeric_limits<int>::max();
15 //***************************************************************************************************************
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs){
19 m = MothurOut::getInstance();
21 numRefSeqs = refs.size();
23 referenceSeqs.resize(numRefSeqs);
24 referenceNames.resize(numRefSeqs);
25 for(int i=0;i<numRefSeqs;i++){
26 referenceSeqs[i] = refs[i].getAligned();
27 referenceNames[i] = refs[i].getName();
30 alignLength = referenceSeqs[0].length();
34 //***************************************************************************************************************
36 int RefChimeraTest::printHeader(ofstream& chimeraReportFile){
38 chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl;
40 }catch(exception& e) {
41 m->errorOut(e, "RefChimeraTest", "execute");
45 //***************************************************************************************************************
47 int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& chimeraReportFile){
49 vector<vector<int> > left(numRefSeqs);
50 vector<int> singleLeft, bestLeft;
51 vector<int> singleRight, bestRight;
53 vector<vector<int> > right(numRefSeqs);
54 for(int i=0;i<numRefSeqs;i++){
55 left[i].assign(alignLength, 0);
59 int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
61 int leftParentBi, rightParentBi, breakPointBi;
62 int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
64 // int minMismatchToTrimera = MAXINT;
65 // int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
68 string chimeraRefSeq = "";
70 if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){
73 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
75 // minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight);
77 // if(minMismatchToChimera - minMismatchToTrimera <= 3){
79 // chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
83 // chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB);
89 chimeraRefSeq = referenceSeqs[bestMatch];
93 double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
95 // double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);
97 chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
98 chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
99 chimeraReportFile << minMismatchToChimera << '\t';
102 // chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA";
105 // chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera;
108 chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
113 /**************************************************************************************************/
115 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
117 int bestSequenceMismatch = MAXINT;
119 for(int i=0;i<numRefSeqs;i++){
123 for(int l=0;l<alignLength;l++){
124 // if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
126 if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
134 for(int l=alignLength-1;l>=0;l--){
135 // if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
136 if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){
139 right[i][index++] = rDiffs;
142 if(lDiffs < bestSequenceMismatch){
143 bestSequenceMismatch = lDiffs;
147 return bestSequenceMismatch;
150 /**************************************************************************************************/
152 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
154 singleLeft.resize(alignLength, MAXINT);
155 bestLeft.resize(alignLength, -1);
157 for(int l=0;l<alignLength;l++){
158 for(int i=0;i<numRefSeqs;i++){
159 if(left[i][l] <= singleLeft[l]){
160 singleLeft[l] = left[i][l];
166 singleRight.resize(alignLength, MAXINT);
167 bestRight.resize(alignLength, -1);
169 for(int l=0;l<alignLength;l++){
170 for(int i=0;i<numRefSeqs;i++){
171 if(right[i][l] <= singleRight[l]){
172 singleRight[l] = right[i][l];
178 int bestChimeraMismatches = MAXINT;
183 for(int l=0;l<alignLength;l++){
184 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
185 if(chimera < bestChimeraMismatches){
186 bestChimeraMismatches = chimera;
188 leftParent = bestLeft[l];
189 rightParent = bestRight[alignLength - l - 1];
193 return bestChimeraMismatches;
196 /**************************************************************************************************/
198 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
200 int bestTrimeraMismatches = MAXINT;
209 vector<vector<int> > minDelta(alignLength);
210 vector<vector<int> > minDeltaSeq(alignLength);
212 for(int i=0;i<alignLength;i++){
213 minDelta[i].assign(alignLength, MAXINT);
214 minDeltaSeq[i].assign(alignLength, -1);
217 for(int x=0;x<alignLength;x++){
218 for(int y=x;y<alignLength-1;y++){
220 for(int i=0;i<numRefSeqs;i++){
221 int delta = left[i][y] - left[i][x];
223 if(delta <= minDelta[x][y]){
224 minDelta[x][y] = delta;
225 minDeltaSeq[x][y] = i;
228 minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
230 if(minDelta[x][y] < bestTrimeraMismatches){
231 bestTrimeraMismatches = minDelta[x][y];
236 leftParent = bestLeft[x];
237 middleParent = minDeltaSeq[x][y];
238 rightParent = bestRight[alignLength - y - 2];
242 return bestTrimeraMismatches;
245 /**************************************************************************************************/
247 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
249 string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
250 return chimeraRefSeq;
254 /**************************************************************************************************/
256 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
258 string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
260 return chimeraRefSeq;
263 /**************************************************************************************************/
265 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
270 for(int i=0;i<alignLength;i++){
271 // if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
272 if(chimeraRefSeq[i] != '.' && querySeq[i] != '.'){
273 if(querySeq[i] == '-' && chimeraRefSeq[i] == '-' && chimeraRefSeq[i] != 'N'){ /* do nothing */ }
274 else if(querySeq[i] == chimeraRefSeq[i]){
283 return (double)mismatch / (double)(mismatch + match);
286 //***************************************************************************************************************
288 int RefChimeraTest::getClosestRefIndex(){
294 //***************************************************************************************************************