X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=distancedb.cpp;h=ca6ffe8ba61217ac0b66b353e8f33461f4916e52;hb=37eac2026d91179acda0494c4dcca22f176551b9;hp=25c95c7a32a2523080bac92b41722ed1f1b91e6d;hpb=02909d6cae9963ba00dc746969a370fa8ca934fc;p=mothur.git diff --git a/distancedb.cpp b/distancedb.cpp index 25c95c7..ca6ffe8 100644 --- a/distancedb.cpp +++ b/distancedb.cpp @@ -11,52 +11,101 @@ #include "database.hpp" #include "sequence.hpp" #include "distancedb.hpp" - +#include "eachgapdist.h" /**************************************************************************************************/ - -DistanceDB::DistanceDB(string fastaFileName, string distanceFileName) : Database(fastaFileName) { - - ifstream inputData; - openInputFile(distanceFileName, inputData); - int numCandSeqs=count(istreambuf_iterator(inputData),istreambuf_iterator(), '\n'); // count the number of - inputData.seekg(0); // sequences - - hit closestMatch; - string candidateSeqName; - string junk; - - mostSimSequenceVector.resize(numCandSeqs); - - for(int i=0;i> candidateSeqName >> closestMatch.seqName >> closestMatch.indexNumber >> closestMatch.simScore; -// getline(inputData, junk); - mostSimSequenceVector[i] = closestMatch; +DistanceDB::DistanceDB() { + try { + templateAligned = true; + templateSeqsLength = 0; + distCalculator = new eachGapDist(); } - cout << numCandSeqs << endl; - searchIndex = 0; - inputData.close(); + catch(exception& e) { + m->errorOut(e, "DistanceDB", "DistanceDB"); + exit(1); + } } - /**************************************************************************************************/ - -Sequence DistanceDB::findClosestSequence(Sequence* candidateSeq){ - - hit simAccession = mostSimSequenceVector[searchIndex]; -// string candidateSeqName, closestMatchSeqName, junk; -// int closestMatchIndexNumber; -// float closestMatchSimScore; -// -// inputData >> candidateSeqName >> closestMatchSeqName >> closestMatchIndexNumber >> closestMatchSimScore; -// getline(inputData, junk); - -// searchScore = 100. * closestMatchSimScore; - - searchScore = 100. * simAccession.simScore; - searchIndex++; -// return templateSequences[closestMatchIndexNumber]; - return templateSequences[simAccession.indexNumber]; +void DistanceDB::addSequence(Sequence seq) { + try { + //are the template sequences aligned + if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); } + + if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); } + + data.push_back(seq); + } + catch(exception& e) { + m->errorOut(e, "DistanceDB", "addSequence"); + exit(1); + } +} +/**************************************************************************************************/ +//returns indexes to top matches +vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ + try { + vector topMatches; + bool templateSameLength = true; + string sequence = query->getAligned(); + vector dists; + + searchScore = -1.0; + if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); } + + if (sequence.length() != templateSeqsLength) { templateSameLength = false; } + + if (templateSameLength && templateAligned) { + //calc distance from this sequence to every sequence in the template + for (int i = 0; i < data.size(); i++) { + distCalculator->calcDist(*query, data[i]); + float dist = distCalculator->getDist(); + + //save distance to each template sequence + seqDist temp(-1, i, dist); + dists.push_back(temp); + } + + sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest + + //save distance of best match + searchScore = dists[0].dist; + + //fill topmatches with numwanted closest sequences indexes + for (int i = 0; i < numWanted; i++) { + topMatches.push_back(dists[i].seq2); + } + + }else{ + m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine(); + exit(1); + } + + return topMatches; + } + catch(exception& e) { + m->errorOut(e, "DistanceDB", "findClosestSequence"); + exit(1); + } +} +/**************************************************************************************************/ +bool DistanceDB::isAligned(string seq){ + try { + bool aligned; + + int pos = seq.find_first_of(".-"); + + if (pos != seq.npos) { + aligned = true; + }else { aligned = false; } + + + return aligned; + } + catch(exception& e) { + m->errorOut(e, "DistanceDB", "isAligned"); + exit(1); + } } /**************************************************************************************************/