From d8ed98e71c2da5b39b8a778e2c694a4ddff677eb Mon Sep 17 00:00:00 2001 From: pschloss Date: Mon, 18 Apr 2011 15:01:44 +0000 Subject: [PATCH] tie breaker fix to chimera.slayer --- chimera.h | 1 + chimeraslayer.cpp | 10 ++- decalc.cpp | 7 +- maligner.cpp | 200 +++++++++++++++++++++++++++++++++++++--------- maligner.h | 3 +- 5 files changed, 178 insertions(+), 43 deletions(-) diff --git a/chimera.h b/chimera.h index 2f8ce61..f827346 100644 --- a/chimera.h +++ b/chimera.h @@ -77,6 +77,7 @@ struct score_struct { int score; int row; int col; +// int mismatches; }; /***********************************************************************/ struct trace_struct { diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 61ad0e0..77a8695 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -780,7 +780,10 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { //*************************************************************************************************************** int ChimeraSlayer::getChimeras(Sequence* query) { try { - if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); printResults.trimQuery = *trimQuery; } + if (trimChimera) { + trimQuery = new Sequence(query->getName(), query->getAligned()); + printResults.trimQuery = *trimQuery; + } chimeraFlags = "no"; printResults.flag = "no"; @@ -794,7 +797,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) { //you must create a template vector thisTemplate; if (templateFileName != "self") { thisTemplate = templateSeqs; } - else { thisTemplate = getTemplate(query); } //fills thistemplate and creates the databases + else { thisTemplate = getTemplate(query); } //fills this template and creates the databases if (m->control_pressed) { return 0; } @@ -812,10 +815,11 @@ int ChimeraSlayer::getChimeras(Sequence* query) { if (m->control_pressed) { return 0; } string chimeraFlag = maligner.getResults(query, decalc); + if (m->control_pressed) { return 0; } + vector Results = maligner.getOutput(); - //found in testing realigning only made things worse if (realign) { ChimeraReAligner realigner(thisTemplate, match, misMatch); realigner.reAlign(query, Results); diff --git a/decalc.cpp b/decalc.cpp index 2e577d2..f0b1ecc 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -780,7 +780,7 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector seen; map::iterator it; @@ -827,12 +827,15 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector dists.size()) { //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); - numWanted = dists.size(); } + numWanted = dists.size(); + } //cout << numWanted << endl; for (int i = 0; i < numWanted; i++) { //cout << dists[i].seq->getName() << '\t' << dists[i].dist << endl; + Sequence* temp = new Sequence(db[dists[i].index]->getName(), db[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. + seqsMatches.push_back(temp); indexes.push_back(dists[i].index); } diff --git a/maligner.cpp b/maligner.cpp index 7f0cd0a..eaaaa87 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -13,7 +13,18 @@ /***********************************************************************/ Maligner::Maligner(vector temp, int num, int match, int misMatch, float div, int ms, int minCov, string mode, Database* dataLeft, Database* dataRight) : - db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov), searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) { m = MothurOut::getInstance(); } + db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov), searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) { + + + m = MothurOut::getInstance(); + +// cout << matchScore << '\t' << misMatchPenalty << endl; +// +// matchScore = 1; +// misMatchPenalty = -1; + + } + /***********************************************************************/ string Maligner::getResults(Sequence* q, DeCalculator* decalc) { try { @@ -79,7 +90,6 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { temp.push_back(query); verticalFilter(temp); -//for (int i = 0; i < temp.size(); i++) { cout << temp[i]->getName() << '\n' << temp[i]->getAligned() << endl; } return "no"; vector< vector > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes @@ -87,23 +97,20 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { fillScoreMatrix(matrix, refSeqs, chimeraPenalty); - vector path = extractHighestPath(matrix); - - if (m->control_pressed) { return chimera; } + vector trace = extractHighestPath(matrix); + +// cout << "traces\n"; +// for(int i=0;igetName() << endl; +// } - vector trace = mapTraceRegionsToAlignment(path, refSeqs); - if (trace.size() > 1) { chimera = "yes"; } else { chimera = "no"; } - int traceStart = path[0].col; - int traceEnd = path[path.size()-1].col; + int traceStart = trace[0].col; + int traceEnd = trace[trace.size()-1].oldCol; string queryInRange = query->getAligned(); queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); - - string chimeraSeq = constructChimericSeq(trace, refSeqs); - - percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); if (m->control_pressed) { return chimera; } @@ -127,7 +134,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { temp.queryToParent = computePercentID(queryInRange, parentInRange); temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); - + string queryInRegion = query->getAligned(); queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); @@ -138,7 +145,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { outputResults.push_back(temp); } - + return chimera; } catch(exception& e) { @@ -196,9 +203,11 @@ int Maligner::computeChimeraPenalty() { try { int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases()); - + +// if(numAllowable < 2){ numAllowable = 2; } + int penalty = int(numAllowable + 1) * misMatchPenalty; - + return penalty; } @@ -263,9 +272,9 @@ void Maligner::verticalFilter(vector seqs) { vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { try{ - vector< vector > m; m.resize(rows); + vector< vector > m(rows); - for (int i = 0; i < m.size(); i++) { + for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { //initialize each cell @@ -286,7 +295,9 @@ vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { exit(1); } } + //*************************************************************************************************************** + void Maligner::fillScoreMatrix(vector >& ms, vector seqs, int penalty) { try{ @@ -302,10 +313,13 @@ void Maligner::fillScoreMatrix(vector >& ms, vector >& ms, vector >& ms, vectorgetName(); +// for(int j=0;jgetName(); +// for(int j=0;jgetName(); +// for(int j=0;jerrorOut(e, "Maligner", "fillScoreMatrix"); exit(1); } } + //*************************************************************************************************************** -vector Maligner::extractHighestPath(vector > ms) { + +vector Maligner::extractHighestPath(vector > ms) { try { + //get matrix dimensions int numCols = query->getAligned().length(); int numRows = ms.size(); //find highest score scoring matrix - score_struct highestStruct; + vector highestStruct; int highestScore = 0; for (int i = 0; i < numRows; i++) { for (int j = 0; j < numCols; j++) { if (ms[i][j].score > highestScore) { highestScore = ms[i][j].score; - highestStruct = ms[i][j]; + highestStruct.resize(0); + highestStruct.push_back(ms[i][j]); + } + else if(ms[i][j].score == highestScore){ + highestStruct.push_back(ms[i][j]); } } } - - vector path; + +// cout << highestScore << '\t' << highestStruct.size() << endl; - int rowIndex = highestStruct.row; - int pos = highestStruct.col; - int score = highestStruct.score; + vector maxTrace; + double maxPercentIdenticalQueryAntiChimera = 0; + int maxIndex = -1; + + for(int i=0;i path; + + int rowIndex = highestStruct[i].row; + int pos = highestStruct[i].col; + int score = highestStruct[i].score; + + while (pos >= 0 && score > 0) { + score_struct temp = ms[rowIndex][pos]; + score = temp.score; + + if (score > 0) { path.push_back(temp); } + + rowIndex = temp.prev; + pos--; + } + + reverse(path.begin(), path.end()); + + vector trace = mapTraceRegionsToAlignment(path, refSeqs); - while (pos >= 0 && score > 0) { - score_struct temp = ms[rowIndex][pos]; - score = temp.score; +// cout << "traces\n"; +// for(int j=0;jgetName() << endl; +// } - if (score > 0) { path.push_back(temp); } +// Need to do something with this in a bit... +// if (trace.size() > 1) { chimera = "yes"; } +// else { chimera = "no"; } - rowIndex = temp.prev; - pos--; + int traceStart = path[0].col; + int traceEnd = path[path.size()-1].col; +// cout << "traceStart/End\t" << traceStart << '\t' << traceEnd << endl; + + string queryInRange = query->getAligned(); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); +// cout << "here" << endl; + string chimeraSeq = constructChimericSeq(trace, refSeqs); + string antiChimeraSeq = constructAntiChimericSeq(trace, refSeqs); + + percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); + double percentIdenticalQueryAntiChimera = computePercentID(queryInRange, antiChimeraSeq); +// cout << i << '\t' << percentIdenticalQueryChimera << '\t' << percentIdenticalQueryAntiChimera << endl; + + if(percentIdenticalQueryAntiChimera > maxPercentIdenticalQueryAntiChimera){ + maxPercentIdenticalQueryAntiChimera = percentIdenticalQueryAntiChimera; + maxTrace = trace; + maxIndex = i; + } } - - reverse(path.begin(), path.end()); - - return path; +// cout << maxPercentIdenticalQueryAntiChimera << '\t' << maxIndex << endl; + return maxTrace; } catch(exception& e) { @@ -399,7 +487,9 @@ vector Maligner::extractHighestPath(vector > exit(1); } } + //*************************************************************************************************************** + vector Maligner::mapTraceRegionsToAlignment(vector path, vector seqs) { try { vector trace; @@ -434,6 +524,13 @@ vector Maligner::mapTraceRegionsToAlignment(vector p temp.row = region_index; trace.push_back(temp); +// cout << endl; +// cout << trace.size() << endl; +// for(int i=0;igetName() << endl; +// } +// cout << endl; + return trace; } @@ -442,12 +539,16 @@ vector Maligner::mapTraceRegionsToAlignment(vector p exit(1); } } + //*************************************************************************************************************** + string Maligner::constructChimericSeq(vector trace, vector seqs) { try { string chimera = ""; for (int i = 0; i < trace.size(); i++) { +// cout << i << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl; + string seqAlign = seqs[trace[i].row]->getAligned(); seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); chimera += seqAlign; @@ -460,6 +561,31 @@ string Maligner::constructChimericSeq(vector trace, vector trace, vector seqs) { + try { + string antiChimera = ""; + + for (int i = 0; i < trace.size(); i++) { +// cout << i << '\t' << (trace.size() - i - 1) << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl; + + int oppositeIndex = trace.size() - i - 1; + + string seqAlign = seqs[trace[oppositeIndex].row]->getAligned(); + seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); + antiChimera += seqAlign; + } + + return antiChimera; + } + catch(exception& e) { + m->errorOut(e, "Maligner", "constructChimericSeq"); + exit(1); + } +} + //*************************************************************************************************************** float Maligner::computePercentID(string queryAlign, string chimera) { try { diff --git a/maligner.h b/maligner.h index a4c959b..6ddd6ac 100644 --- a/maligner.h +++ b/maligner.h @@ -47,9 +47,10 @@ class Maligner { vector< vector > buildScoreMatrix(int, int); void fillScoreMatrix(vector >&, vector, int); - vector extractHighestPath(vector >); + vector extractHighestPath(vector >); vector mapTraceRegionsToAlignment(vector, vector); string constructChimericSeq(vector, vector); + string constructAntiChimericSeq(vector, vector); float computePercentID(string, string); string chimeraMaligner(int, DeCalculator*); vector getBlastSeqs(Sequence*, int); -- 2.39.2