X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=maligner.cpp;h=252b453cb6989868f65ceb71e59a27587d9baa7a;hb=836150c0a3666899ad58426388f4999d6cf8829a;hp=18f740911b2ccf342c4893cb00f629da95e7307b;hpb=5f07b42548368731cedeb92cbf3e79f630b4e20f;p=mothur.git diff --git a/maligner.cpp b/maligner.cpp index 18f7409..252b453 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -106,7 +106,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { // } if (trace.size() > 1) { chimera = "yes"; } - else { chimera = "no"; } + else { chimera = "no"; return chimera; } int traceStart = trace[0].col; int traceEnd = trace[trace.size()-1].oldCol; @@ -633,20 +633,30 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery); vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1); + vector leftScores = databaseLeft->getMegaBlastSearchScores(); vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1); + vector rightScores = databaseLeft->getMegaBlastSearchScores(); //if ((tempIndexesRight.size() == 0) && (tempIndexesLeft.size() == 0)) { m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to process sequence " + q->getName()); m->mothurOutEndLine(); return refResults; } vector smaller; + vector smallerScores; vector larger; + vector largerScores; - if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } - else { smaller = tempIndexesLeft; larger = tempIndexesRight; } + if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; smallerScores = rightScores; larger = tempIndexesLeft; largerScores = leftScores; } + else { smaller = tempIndexesLeft; smallerScores = leftScores; larger = tempIndexesRight; largerScores = rightScores; } + + //for (int i = 0; i < smaller.size(); i++) { cout << "smaller = " << smaller[i] << '\t' << smallerScores[i] << endl; } + //cout << endl; + //for (int i = 0; i < larger.size(); i++) { cout << "larger = " << larger[i] << '\t' << largerScores[i] << endl; } //merge results map seen; map::iterator it; - + float lastSmaller = smallerScores[0]; + float lastLarger = largerScores[0]; + int lasti = 0; vector mergedResults; for (int i = 0; i < smaller.size(); i++) { //add left if you havent already @@ -654,6 +664,7 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { if (it == seen.end()) { mergedResults.push_back(smaller[i]); seen[smaller[i]] = smaller[i]; + lastSmaller = smallerScores[i]; } //add right if you havent already @@ -661,17 +672,63 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { if (it == seen.end()) { mergedResults.push_back(larger[i]); seen[larger[i]] = larger[i]; + lastLarger = largerScores[i]; } + + lasti = i; + if (mergedResults.size() > num) { break; } } - for (int i = smaller.size(); i < larger.size(); i++) { - it = seen.find(larger[i]); - if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; + //save lasti for smaller ties below + lasti++; + int iSmaller = lasti; + + if (!(mergedResults.size() > num)) { //if we still need more results. + for (int i = smaller.size(); i < larger.size(); i++) { + it = seen.find(larger[i]); + if (it == seen.end()) { + mergedResults.push_back(larger[i]); + seen[larger[i]] = larger[i]; + lastLarger = largerScores[i]; + } + + lasti = i; + if (mergedResults.size() > num) { break; } } } + + //add in any ties from smaller + while (iSmaller < smaller.size()) { + if (smallerScores[iSmaller] == lastSmaller) { + it = seen.find(smaller[iSmaller]); + + if (it == seen.end()) { + mergedResults.push_back(smaller[iSmaller]); + seen[smaller[iSmaller]] = smaller[iSmaller]; + } + } + else { break; } + iSmaller++; + } + + lasti++; + //add in any ties from larger + while (lasti < larger.size()) { + if (largerScores[lasti] == lastLarger) { //is it a tie + it = seen.find(larger[lasti]); + + if (it == seen.end()) { //we haven't already seen it + mergedResults.push_back(larger[lasti]); + seen[larger[lasti]] = larger[lasti]; + } + } + else { break; } + lasti++; + } + + numWanted = seen.size(); + if (mergedResults.size() < numWanted) { numWanted = mergedResults.size(); } //cout << q->getName() << " merged results size = " << mergedResults.size() << '\t' << "numwanted = " << numWanted << endl; for (int i = 0; i < numWanted; i++) {