5 * Created by Pat Schloss on 1/31/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "refchimeratest.h"
13 int MAXINT = numeric_limits<int>::max();
15 //***************************************************************************************************************
17 RefChimeraTest::RefChimeraTest(vector<Sequence>& refs, string chimeraReportFileName){
19 m = MothurOut::getInstance();
21 m->openOutputFile(chimeraReportFileName, chimeraReportFile);
22 numRefSeqs = refs.size();
24 referenceSeqs.resize(numRefSeqs);
25 referenceNames.resize(numRefSeqs);
26 for(int i=0;i<numRefSeqs;i++){
27 referenceSeqs[i] = refs[i].getAligned();
28 referenceNames[i] = refs[i].getName();
31 alignLength = referenceSeqs[0].length();
33 chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\t";
34 chimeraReportFile << "leftParentChi,middleParentTri,rightParentChi\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl;
38 //***************************************************************************************************************
40 int RefChimeraTest::analyzeQuery(string queryName, string querySeq){
42 vector<vector<int> > left(numRefSeqs);
43 vector<int> singleLeft, bestLeft;
44 vector<int> singleRight, bestRight;
46 vector<vector<int> > right(numRefSeqs);
47 for(int i=0;i<numRefSeqs;i++){
48 left[i].assign(alignLength, 0);
52 int bestSequenceMismatch = getMismatches(querySeq, left, right, bestMatch);
54 int leftParentBi, rightParentBi, breakPointBi;
55 int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight);
57 int minMismatchToTrimera = MAXINT;
58 int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB;
61 string chimeraRefSeq = "";
63 if(bestSequenceMismatch - minMismatchToChimera <= 3){
65 chimeraRefSeq = referenceSeqs[bestMatch];
69 minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight);
71 if(minMismatchToChimera - minMismatchToTrimera <= 3){
73 chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi);
77 chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB);
81 double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq);
83 // double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix);
85 chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t';
86 chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t';
87 chimeraReportFile << minMismatchToChimera << '\t';
90 chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA";
93 chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera;
96 chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl;
101 /**************************************************************************************************/
103 int RefChimeraTest::getMismatches(string& querySeq, vector<vector<int> >& left, vector<vector<int> >& right, int& bestRefSeq){
105 int bestSequenceMismatch = MAXINT;
107 for(int i=0;i<numRefSeqs;i++){
110 for(int l=0;l<alignLength;l++){
111 if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
119 for(int l=alignLength-1;l>=0;l--){
120 if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){
123 right[i][index++] = rDiffs;
125 if(lDiffs < bestSequenceMismatch){
126 bestSequenceMismatch = lDiffs;
130 return bestSequenceMismatch;
133 /**************************************************************************************************/
135 int RefChimeraTest::getChimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& rightParent, int& breakPoint, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
137 singleLeft.resize(alignLength, MAXINT);
138 bestLeft.resize(alignLength, -1);
140 for(int l=0;l<alignLength;l++){
141 for(int i=0;i<numRefSeqs;i++){
142 if(left[i][l] <= singleLeft[l]){
143 singleLeft[l] = left[i][l];
149 singleRight.resize(alignLength, MAXINT);
150 bestRight.resize(alignLength, -1);
152 for(int l=0;l<alignLength;l++){
153 for(int i=0;i<numRefSeqs;i++){
154 if(right[i][l] <= singleRight[l]){
155 singleRight[l] = right[i][l];
161 int bestChimeraMismatches = MAXINT;
166 for(int l=0;l<alignLength;l++){
167 int chimera = singleLeft[l] + singleRight[alignLength - l - 1];
168 if(chimera < bestChimeraMismatches){
169 bestChimeraMismatches = chimera;
171 leftParent = bestLeft[l];
172 rightParent = bestRight[alignLength - l - 1];
176 return bestChimeraMismatches;
179 /**************************************************************************************************/
181 int RefChimeraTest::getTrimera(vector<vector<int> >& left, vector<vector<int> >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector<int>& singleLeft, vector<int>& bestLeft, vector<int>& singleRight, vector<int>& bestRight){
183 int bestTrimeraMismatches = MAXINT;
192 vector<vector<int> > minDelta(alignLength);
193 vector<vector<int> > minDeltaSeq(alignLength);
195 for(int i=0;i<alignLength;i++){
196 minDelta[i].assign(alignLength, MAXINT);
197 minDeltaSeq[i].assign(alignLength, -1);
200 for(int x=0;x<alignLength;x++){
201 for(int y=x;y<alignLength-1;y++){
203 for(int i=0;i<numRefSeqs;i++){
204 int delta = left[i][y] - left[i][x];
206 if(delta <= minDelta[x][y]){
207 minDelta[x][y] = delta;
208 minDeltaSeq[x][y] = i;
211 minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
213 if(minDelta[x][y] < bestTrimeraMismatches){
214 bestTrimeraMismatches = minDelta[x][y];
219 leftParent = bestLeft[x];
220 middleParent = minDeltaSeq[x][y];
221 rightParent = bestRight[alignLength - y - 2];
225 return bestTrimeraMismatches;
228 /**************************************************************************************************/
230 string RefChimeraTest::stitchBimera(int leftParent, int rightParent, int breakPoint){
232 string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPoint) + referenceSeqs[rightParent].substr(breakPoint);
233 return chimeraRefSeq;
237 /**************************************************************************************************/
239 string RefChimeraTest::stitchTrimera(int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB){
241 string chimeraRefSeq = referenceSeqs[leftParent].substr(0, breakPointA) + referenceSeqs[middleParent].substr(breakPointA, breakPointB-breakPointA) + referenceSeqs[rightParent].substr(breakPointB);
243 return chimeraRefSeq;
246 /**************************************************************************************************/
248 double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq){
253 for(int i=0;i<alignLength;i++){
254 if(querySeq[i] != '.' && chimeraRefSeq[i] != '.'){
255 if(querySeq[i] == '-' && chimeraRefSeq[i] == '-'){ /* do nothing */ }
256 else if(querySeq[i] == chimeraRefSeq[i]){
265 return (double)mismatch / (double)(mismatch + match);
268 //***************************************************************************************************************
270 int RefChimeraTest::getClosestRefIndex(){
276 //***************************************************************************************************************