5 * Created by Pat Schloss on 12/29/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "onegapignore.h"
16 /**************************************************************************************************/
17 DistanceDB::DistanceDB() {
19 templateAligned = true;
20 templateSeqsLength = 0;
21 distCalculator = new oneGapIgnoreTermGapDist();
24 m->errorOut(e, "DistanceDB", "DistanceDB");
28 /**************************************************************************************************/
29 void DistanceDB::addSequence(Sequence seq) {
31 //are the template sequences aligned
32 if (!isAligned(seq.getAligned())) {
33 templateAligned = false;
34 m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
35 m->mothurOutEndLine();
38 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
43 m->errorOut(e, "DistanceDB", "addSequence");
47 /**************************************************************************************************/
48 //returns indexes to top matches
49 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
51 vector<int> topMatches;
53 bool templateSameLength = true;
54 string sequence = query->getAligned();
55 vector<seqDist> dists;
59 if (numWanted > data.size()){
60 m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
61 m->mothurOutEndLine();
62 numWanted = data.size();
65 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
67 if (templateSameLength && templateAligned) {
70 dists.resize(data.size());
72 //calc distance from this sequence to every sequence in the template
73 for (int i = 0; i < data.size(); i++) {
74 distCalculator->calcDist(*query, data[i]);
75 float dist = distCalculator->getDist();
77 //save distance to each template sequence
83 sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest
85 //save distance of best match
86 searchScore = dists[0].dist;
88 //fill topmatches with numwanted closest sequences indexes
89 for (int i = 0; i < numWanted; i++) {
90 topMatches.push_back(dists[i].seq2);
91 Scores.push_back(dists[i].dist);
95 float smallDist = 100000;
96 for (int i = 0; i < data.size(); i++) {
97 distCalculator->calcDist(*query, data[i]);
98 float dist = distCalculator->getDist();
101 if (dist < smallDist) {
106 searchScore = smallDist;
107 topMatches.push_back(bestIndex);
108 Scores.push_back(smallDist);
112 m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
113 m->mothurOutEndLine();
119 catch(exception& e) {
120 m->errorOut(e, "DistanceDB", "findClosestSequence");
124 /**************************************************************************************************/
125 bool DistanceDB::isAligned(string seq){
129 int pos = seq.find_first_of(".-");
131 if (pos != seq.npos) {
133 }else { aligned = false; }
138 catch(exception& e) {
139 m->errorOut(e, "DistanceDB", "isAligned");
144 /**************************************************************************************************/