X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=decalc.cpp;h=98545f8e24a72dca332bf7571fc0e5e9e6936988;hp=3b3740bcea6682e14b21300a48e07fdb9f7079a3;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=1cf188b912d6da8f2cd03dd71cecef664a699c1a diff --git a/decalc.cpp b/decalc.cpp index 3b3740b..98545f8 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -599,7 +599,7 @@ cout << largest->second << '\t' << largest->first->score << '\t' << largest->fir } } -//*************************************************************************************************************** +*************************************************************************************************************** //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used... int DeCalculator::findLargestContrib(vector seen) { try{ @@ -624,7 +624,7 @@ int DeCalculator::findLargestContrib(vector seen) { exit(1); } } -//*************************************************************************************************************** +*************************************************************************************************************** void DeCalculator::removeContrib(int bad, vector& quan) { try{ @@ -683,23 +683,23 @@ float DeCalculator::getCoef(vector obs, vector qav) { } //*************************************************************************************************************** //gets closest matches to each end, since chimeras will most likely have different parents on each end -vector DeCalculator::findClosest(Sequence* querySeq, vector& thisTemplate, vector& thisFilteredTemplate, int numWanted, int minSim) { +vector DeCalculator::findClosest(Sequence querySeq, vector& thisTemplate, vector& thisFilteredTemplate, int numWanted, int minSim) { try { //indexes.clear(); - vector seqsMatches; + vector seqsMatches; vector distsLeft; vector distsRight; Dist* distcalculator = new eachGapDist(); - string queryUnAligned = querySeq->getUnaligned(); + string queryUnAligned = querySeq.getUnaligned(); int numBases = int(queryUnAligned.length() * 0.33); string leftQuery = ""; //first 1/3 of the sequence string rightQuery = ""; //last 1/3 of the sequence - string queryAligned = querySeq->getAligned(); + string queryAligned = querySeq.getAligned(); //left side bool foundFirstBase = false; @@ -726,7 +726,7 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector= numBases) { rightSpot = i; break; } //last 1/3 + if (baseCount > numBases + 1) { rightSpot = i; break; } //last 1/3 } //trim end @@ -738,18 +738,19 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName(), leftQuery); - Sequence queryRight(querySeq->getName(), rightQuery); //cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl; //cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl; for(int j = 0; j < thisFilteredTemplate.size(); j++){ string dbAligned = thisFilteredTemplate[j]->getAligned(); string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence - string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot)); //last 1/3 of the sequence - + string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //last 1/3 of the sequence + Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB); Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB); @@ -758,7 +759,7 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorcalcDist(queryRight, dbRight); float distRight = distcalculator->getDist(); - + SeqDist subjectLeft; subjectLeft.seq = NULL; subjectLeft.dist = distLeft; @@ -780,6 +781,7 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector seen; @@ -788,14 +790,15 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector dists; float lastRight = distsRight[0].dist; float lastLeft = distsLeft[0].dist; - //int lasti = 0; + + float maxDist = 1.0 - (minSim / 100.0); + for (int i = 0; i < numWanted+1; i++) { - if (m->control_pressed) { return seqsMatches; } //add left if you havent already it = seen.find(thisTemplate[distsLeft[i].index]->getName()); - if (it == seen.end()) { + if (it == seen.end() && distsLeft[i].dist <= maxDist) { dists.push_back(distsLeft[i]); seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName(); lastLeft = distsLeft[i].dist; @@ -804,13 +807,15 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName()); - if (it == seen.end()) { + if (it == seen.end() && distsRight[i].dist <= maxDist) { dists.push_back(distsRight[i]); seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName(); lastRight = distsRight[i].dist; // cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl; } + if (i == numWanted) { break; } + } //are we still above the minimum similarity cutoff @@ -832,15 +837,13 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName() << '\t' << dists[i].dist << endl; - if ((thisTemplate[dists[i].index]->getName() != querySeq->getName()) && (dists[i].dist >= minSim)) { - Sequence* temp = new Sequence(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. - + if ((thisTemplate[dists[i].index]->getName() != querySeq.getName()) && (((1.0-dists[i].dist)*100) >= minSim)) { + Sequence temp(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. + //cout << querySeq->getName() << '\t' << thisTemplate[dists[i].index]->getName() << '\t' << dists[i].dist << endl; seqsMatches.push_back(temp); } @@ -886,17 +889,17 @@ Sequence* DeCalculator::findClosest(Sequence* querySeq, vector db) { } } /***************************************************************************************************************/ -map DeCalculator::trimSeqs(Sequence* query, vector topMatches) { +map DeCalculator::trimSeqs(Sequence& query, vector& topMatches) { try { int frontPos = 0; //should contain first position in all seqs that is not a gap character - int rearPos = query->getAligned().length(); + int rearPos = query.getAligned().length(); //********find first position in topMatches that is a non gap character***********// //find first position all query seqs that is a non gap character for (int i = 0; i < topMatches.size(); i++) { - string aligned = topMatches[i]->getAligned(); + string aligned = topMatches[i].getAligned(); int pos = 0; //find first spot in this seq @@ -912,7 +915,7 @@ map DeCalculator::trimSeqs(Sequence* query, vector topMatch } - string aligned = query->getAligned(); + string aligned = query.getAligned(); int pos = 0; //find first position in query that is a non gap character @@ -930,7 +933,7 @@ map DeCalculator::trimSeqs(Sequence* query, vector topMatch //********find last position in topMatches that is a non gap character***********// for (int i = 0; i < topMatches.size(); i++) { - string aligned = topMatches[i]->getAligned(); + string aligned = topMatches[i].getAligned(); int pos = aligned.length(); //find first spot in this seq @@ -946,7 +949,7 @@ map DeCalculator::trimSeqs(Sequence* query, vector topMatch } - aligned = query->getAligned(); + aligned = query.getAligned(); pos = aligned.length(); //find last position in query that is a non gap character @@ -963,24 +966,24 @@ map DeCalculator::trimSeqs(Sequence* query, vector topMatch map trimmedPos; //check to make sure that is not whole seq if ((rearPos - frontPos - 1) <= 0) { - query->setAligned(""); + query.setAligned(""); //trim topMatches for (int i = 0; i < topMatches.size(); i++) { - topMatches[i]->setAligned(""); + topMatches[i].setAligned(""); } }else { //trim query - string newAligned = query->getAligned(); + string newAligned = query.getAligned(); newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); - query->setAligned(newAligned); + query.setAligned(newAligned); //trim topMatches for (int i = 0; i < topMatches.size(); i++) { - newAligned = topMatches[i]->getAligned(); + newAligned = topMatches[i].getAligned(); newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); - topMatches[i]->setAligned(newAligned); + topMatches[i].setAligned(newAligned); } for (int i = 0; i < newAligned.length(); i++) {