X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=decalc.cpp;h=c600a7f56633c270ec83810a306ad71ca3cd65c8;hb=55ec7cde88d5512e177fe9488d5ee13793853bad;hp=1442bd8ec8865f0574d338675d5899332814d8a8;hpb=0d4b21e5ccc56276b0c18d17d8e75d94ce1df4e7;p=mothur.git diff --git a/decalc.cpp b/decalc.cpp index 1442bd8..c600a7f 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -12,7 +12,7 @@ #include "dist.h" #include "eachgapdist.h" #include "ignoregaps.h" -#include "eachgapdistignorens.h" +#include "eachgapdist.h" //*************************************************************************************************************** void DeCalculator::setMask(string ms) { @@ -683,16 +683,16 @@ float DeCalculator::getCoef(vector obs, vector qav) { } //*************************************************************************************************************** //gets closest matches to each end, since chimeras will most likely have different parents on each end -vector DeCalculator::findClosest(Sequence* querySeq, vector db, int& numWanted, vector& indexes) { +vector DeCalculator::findClosest(Sequence* querySeq, vector& thisTemplate, vector& thisFilteredTemplate, int numWanted, int minSim) { try { - indexes.clear(); + //indexes.clear(); vector seqsMatches; vector distsLeft; vector distsRight; - Dist* distcalculator = new eachGapDistIgnoreNs(); + Dist* distcalculator = new eachGapDist(); string queryUnAligned = querySeq->getUnaligned(); int numBases = int(queryUnAligned.length() * 0.33); @@ -744,14 +744,14 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName(), rightQuery); //cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl; //cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl; - for(int j = 0; j < db.size(); j++){ + for(int j = 0; j < thisFilteredTemplate.size(); j++){ - string dbAligned = db[j]->getAligned(); + string dbAligned = thisFilteredTemplate[j]->getAligned(); string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot)); //last 1/3 of the sequence - Sequence dbLeft(db[j]->getName(), leftDB); - Sequence dbRight(db[j]->getName(), rightDB); + Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB); + Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB); distcalculator->calcDist(queryLeft, dbLeft); float distLeft = distcalculator->getDist(); @@ -780,14 +780,6 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName() << '\t' << distsLeft[i].dist << endl; -// } -// for(int i=0;i<15;i++){ -// cout << "right\t" << db[distsLeft[i].index]->getName() << '\t' << distsRight[i].dist << endl; -// } - //merge results map seen; @@ -796,82 +788,62 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector dists; float lastRight = distsRight[0].dist; float lastLeft = distsLeft[0].dist; - int lasti = 0; - for (int i = 0; i < distsLeft.size(); i++) { + //int lasti = 0; + for (int i = 0; i < numWanted+1; i++) { + + if (m->control_pressed) { return seqsMatches; } + //add left if you havent already - it = seen.find(db[distsLeft[i].index]->getName()); + it = seen.find(thisTemplate[distsLeft[i].index]->getName()); if (it == seen.end()) { dists.push_back(distsLeft[i]); - seen[db[distsLeft[i].index]->getName()] = db[distsLeft[i].index]->getName(); + seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName(); lastLeft = distsLeft[i].dist; // cout << "loop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl; } //add right if you havent already - it = seen.find(db[distsRight[i].index]->getName()); + it = seen.find(thisTemplate[distsRight[i].index]->getName()); if (it == seen.end()) { dists.push_back(distsRight[i]); - seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName(); + seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName(); lastRight = distsRight[i].dist; // cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl; } - if (dists.size() > numWanted) { lasti = i; break; } //you have enough results } -// cout << "lastLeft\t" << lastLeft << endl; - - //add in dups - lasti++; - int i = lasti; - while (i < distsLeft.size()) { - if (distsLeft[i].dist == lastLeft) { - it = seen.find(db[distsLeft[i].index]->getName()); - - if (it == seen.end()) { -// cout << "newLoop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl; - dists.push_back(distsLeft[i]); - seen[db[distsRight[i].index]->getName()] = db[distsLeft[i].index]->getName(); -// numWanted++; - } + //are we still above the minimum similarity cutoff + if ((lastLeft >= minSim) || (lastRight >= minSim)) { + //add in ties from left + int i = numWanted; + while (i < distsLeft.size()) { + if (distsLeft[i].dist == lastLeft) { dists.push_back(distsLeft[i]); } + else { break; } + i++; } - else { break; } - i++; - } - -// cout << "lastRight\t" << lastRight << endl; - - i = lasti; - while (i < distsRight.size()) { - if (distsRight[i].dist == lastRight) { - it = seen.find(db[distsRight[i].index]->getName()); - - if (it == seen.end()) { -// cout << "newLoop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl; - dists.push_back(distsRight[i]); - seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName(); -// numWanted++; - } + + //add in ties from right + i = numWanted; + while (i < distsRight.size()) { + if (distsRight[i].dist == lastRight) { dists.push_back(distsRight[i]); } + else { break; } + i++; } - else { break; } - i++; } - - numWanted = seen.size(); - if (numWanted > dists.size()) { - //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); - numWanted = dists.size(); - } -//cout << numWanted << endl; - for (int i = 0; i < numWanted; i++) { + + //cout << numWanted << endl; + for (int i = 0; i < dists.size(); i++) { // cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl; - Sequence* temp = new Sequence(db[dists[i].index]->getName(), db[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. - - seqsMatches.push_back(temp); - indexes.push_back(dists[i].index); + if ((thisTemplate[dists[i].index]->getName() != querySeq->getName()) && (((1.0-dists[i].dist)*100) >= minSim)) { + Sequence* temp = new Sequence(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. + //cout << querySeq->getName() << '\t' << thisTemplate[dists[i].index]->getName() << '\t' << dists[i].dist << endl; + seqsMatches.push_back(temp); + } + } return seqsMatches; @@ -887,7 +859,7 @@ Sequence* DeCalculator::findClosest(Sequence* querySeq, vector db) { Sequence* seqsMatch; - Dist* distcalculator = new eachGapDistIgnoreNs(); + Dist* distcalculator = new eachGapDist(); int index = 0; int smallest = 1000000;