X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=distancedb.cpp;h=27e278574493c89d0197d99cbe71c0c3dd03d0d9;hp=5c49a936e9293c629e05ea5565f334a43e5cc37c;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=6de5adaae66b28aa60a75f123005cede410c156c diff --git a/distancedb.cpp b/distancedb.cpp index 5c49a93..27e2785 100644 --- a/distancedb.cpp +++ b/distancedb.cpp @@ -11,14 +11,15 @@ #include "database.hpp" #include "sequence.hpp" #include "distancedb.hpp" -#include "eachgapdist.h" +#include "onegapignore.h" + /**************************************************************************************************/ -DistanceDB::DistanceDB() { +DistanceDB::DistanceDB() : Database() { try { templateAligned = true; templateSeqsLength = 0; - distCalculator = new eachGapDist(); + distCalculator = new oneGapIgnoreTermGapDist(); } catch(exception& e) { m->errorOut(e, "DistanceDB", "DistanceDB"); @@ -29,7 +30,11 @@ DistanceDB::DistanceDB() { void DistanceDB::addSequence(Sequence seq) { try { //are the template sequences aligned - if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); } + if (!isAligned(seq.getAligned())) { + templateAligned = false; + m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); + m->mothurOutEndLine(); + } if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); } @@ -45,34 +50,68 @@ void DistanceDB::addSequence(Sequence seq) { vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ try { vector topMatches; + Scores.clear(); bool templateSameLength = true; string sequence = query->getAligned(); - vector dists; + vector dists; + + searchScore = -1.0; - if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); } + if (numWanted > data.size()){ + m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); + m->mothurOutEndLine(); + numWanted = data.size(); + } if (sequence.length() != templateSeqsLength) { templateSameLength = false; } if (templateSameLength && templateAligned) { - //calc distance from this sequence to every sequence in the template - for (int i = 0; i < data.size(); i++) { - distCalculator->calcDist(*query, data[i]); - float dist = distCalculator->getDist(); + if (numWanted != 1) { - //save distance to each template sequence - seqDist temp(-1, i, dist); - dists.push_back(temp); - } - - sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest - - //fill topmatches with numwanted closest sequences indexes - for (int i = 0; i < numWanted; i++) { - topMatches.push_back(dists[i].seq2); + dists.resize(data.size()); + + //calc distance from this sequence to every sequence in the template + for (int i = 0; i < data.size(); i++) { + distCalculator->calcDist(*query, data[i]); + float dist = distCalculator->getDist(); + + //save distance to each template sequence + dists[i].seq1 = -1; + dists[i].seq2 = i; + dists[i].dist = dist; + } + + sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest + + //save distance of best match + searchScore = dists[0].dist; + + //fill topmatches with numwanted closest sequences indexes + for (int i = 0; i < numWanted; i++) { + topMatches.push_back(dists[i].seq2); + Scores.push_back(dists[i].dist); + } + }else { + int bestIndex = 0; + float smallDist = 100000; + for (int i = 0; i < data.size(); i++) { + distCalculator->calcDist(*query, data[i]); + float dist = distCalculator->getDist(); + + //are you smaller? + if (dist < smallDist) { + bestIndex = i; + smallDist = dist; + } + } + searchScore = smallDist; + topMatches.push_back(bestIndex); + Scores.push_back(smallDist); } }else{ - m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine(); + m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); + m->mothurOutEndLine(); exit(1); }