X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=maligner.cpp;fp=maligner.cpp;h=c5cc83a4d216b9c5a89eb6f60de8a15e5906d06c;hb=0571957d68cbbc0e425af1db8e808f826010b9e2;hp=7d7145f4ca6bd9fac7a316745ace4432e4c26fe0;hpb=77ac47e1ea0b5a0c6f55eb25e6bc7d7494ed3ad2;p=mothur.git diff --git a/maligner.cpp b/maligner.cpp index 7d7145f..c5cc83a 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -48,7 +48,9 @@ string Maligner::getResults(Sequence* q, DeCalculator* decalc) { if (m->control_pressed) { return chimera; } refSeqs = minCoverageFilter(refSeqs); - + + + if (refSeqs.size() < 2) { for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } percentIdenticalQueryChimera = 0.0; @@ -56,7 +58,7 @@ string Maligner::getResults(Sequence* q, DeCalculator* decalc) { } int chimeraPenalty = computeChimeraPenalty(); - + //cout << "chimeraPenalty = " << chimeraPenalty << endl; //fills outputResults chimera = chimeraMaligner(chimeraPenalty, decalc); @@ -89,8 +91,9 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { vector temp = refSeqs; temp.push_back(query); - verticalFilter(temp); + + //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl << refSeqs[i]->getAligned() << endl; } vector< vector > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes @@ -100,10 +103,10 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { vector trace = extractHighestPath(matrix); -// cout << "traces\n"; -// for(int i=0;igetName() << endl; -// } + //cout << "traces\n"; + //for(int i=0;igetName() << endl; + //} if (trace.size() > 1) { chimera = "yes"; } else { chimera = "no"; return chimera; } @@ -315,7 +318,7 @@ void Maligner::fillScoreMatrix(vector >& ms, vector >& ms, vector >& ms, vectorgetName(); -// for(int j=0;jgetName(); -// for(int j=0;jgetName(); -// for(int j=0;jgetName(); + for(int j=0;jgetName() << endl << seqs[i]->getAligned() << endl << endl; + if ((seqs[i]->getName() == "S000003470") || (seqs[i]->getName() == "S000383265") || (seqs[i]->getName() == "7000004128191054")) { + for(int j=0;jgetName(); + for(int j=0;j Maligner::extractHighestPath(vector > } } -// cout << endl << highestScore << '\t' << highestStruct.size() << '\t' << highestStruct[0].row << endl; + //cout << endl << highestScore << '\t' << highestStruct.size() << '\t' << highestStruct[0].row << endl; vector maxTrace; double maxPercentIdenticalQueryAntiChimera = 0; @@ -450,10 +456,10 @@ vector Maligner::extractHighestPath(vector > vector trace = mapTraceRegionsToAlignment(path, refSeqs); -// cout << "traces\n"; -// for(int j=0;jgetName() << endl; -// } + //cout << "traces\n"; + //for(int j=0;jgetName() << endl; + //} int traceStart = path[0].col; int traceEnd = path[path.size()-1].col; @@ -476,6 +482,7 @@ vector Maligner::extractHighestPath(vector > } // cout << maxPercentIdenticalQueryAntiChimera << endl; return maxTrace; + } catch(exception& e) { @@ -633,9 +640,9 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery); vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1); - vector leftScores = databaseLeft->getMegaBlastSearchScores(); + vector leftScores = databaseLeft->getSearchScores(); vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1); - vector rightScores = databaseLeft->getMegaBlastSearchScores(); + vector rightScores = databaseLeft->getSearchScores(); //if ((tempIndexesRight.size() == 0) && (tempIndexesLeft.size() == 0)) { m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to process sequence " + q->getName()); m->mothurOutEndLine(); return refResults; } @@ -676,11 +683,11 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { } lasti = i; - if (mergedResults.size() > num) { break; } + //if (mergedResults.size() > num) { break; } } //save lasti for smaller ties below - lasti++; + /*lasti++; int iSmaller = lasti; if (!(mergedResults.size() > num)) { //if we still need more results. @@ -726,7 +733,17 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { else { break; } lasti++; } + */ + for (int i = smaller.size(); i < larger.size(); i++) { + //add right if you havent already + it = seen.find(larger[i]); + if (it == seen.end()) { + mergedResults.push_back(larger[i]); + seen[larger[i]] = larger[i]; + lastLarger = largerScores[i]; + } + } numWanted = mergedResults.size(); if (mergedResults.size() < numWanted) { numWanted = mergedResults.size(); } @@ -738,6 +755,7 @@ vector Maligner::getBlastSeqs(Sequence* q, int num) { Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); refResults.push_back(temp); indexes.push_back(mergedResults[i]); + //cout << db[mergedResults[i]]->getName() << endl; } //cout << mergedResults[i] << endl; @@ -768,11 +786,15 @@ vector Maligner::getKmerSeqs(Sequence* q, int num) { vector tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, numWanted); vector tempIndexesRight = databaseRight->findClosestSequences(queryRight, numWanted); + vector scoresLeft = databaseLeft->getSearchScores(); + vector scoresRight = databaseRight->getSearchScores(); //merge results map seen; map::iterator it; - + float lastRight = scoresRight[0]; + float lastLeft = scoresLeft[0]; + //int lasti = 0; vector mergedResults; for (int i = 0; i < tempIndexesLeft.size(); i++) { //add left if you havent already @@ -780,6 +802,7 @@ vector Maligner::getKmerSeqs(Sequence* q, int num) { if (it == seen.end()) { mergedResults.push_back(tempIndexesLeft[i]); seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; + lastLeft = scoresLeft[i]; } //add right if you havent already @@ -787,16 +810,61 @@ vector Maligner::getKmerSeqs(Sequence* q, int num) { if (it == seen.end()) { mergedResults.push_back(tempIndexesRight[i]); seen[tempIndexesRight[i]] = tempIndexesRight[i]; + lastRight = scoresRight[i]; + } + + //if (mergedResults.size() > numWanted) { lasti = i; break; } //you have enough results + } + + //add in sequences with same distance as last sequence added + /*lasti++; + int i = lasti; + while (i < tempIndexesLeft.size()) { + if (scoresLeft[i] == lastLeft) { + it = seen.find(tempIndexesLeft[i]); + + if (it == seen.end()) { + mergedResults.push_back(tempIndexesLeft[i]); + seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; + } + } + else { break; } + i++; + } + + // cout << "lastRight\t" << lastRight << endl; + //add in sequences with same distance as last sequence added + i = lasti; + while (i < tempIndexesRight.size()) { + if (scoresRight[i] == lastRight) { + it = seen.find(tempIndexesRight[i]); + + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[i]); + seen[tempIndexesRight[i]] = tempIndexesRight[i]; + } } + else { break; } + i++; + }*/ + + numWanted = mergedResults.size(); + + if (numWanted > mergedResults.size()) { + //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); + numWanted = mergedResults.size(); } + //cout << q->getName() << endl; vector refResults; for (int i = 0; i < numWanted; i++) { //cout << db[mergedResults[i]]->getName() << endl; - Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); - refResults.push_back(temp); - indexes.push_back(mergedResults[i]); + if (db[mergedResults[i]]->getName() != q->getName()) { + Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); + refResults.push_back(temp); + indexes.push_back(mergedResults[i]); + } } //cout << endl; delete queryRight;