5 * Created by Pat Schloss on 12/29/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "eachgapdist.h"
16 /**************************************************************************************************/
17 DistanceDB::DistanceDB() {
19 templateAligned = true;
20 templateSeqsLength = 0;
21 distCalculator = new eachGapDist();
24 m->errorOut(e, "DistanceDB", "DistanceDB");
28 /**************************************************************************************************/
29 void DistanceDB::addSequence(Sequence seq) {
31 //are the template sequences aligned
32 if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); }
34 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
39 m->errorOut(e, "DistanceDB", "addSequence");
43 /**************************************************************************************************/
44 //returns indexes to top matches
45 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
47 vector<int> topMatches;
48 bool templateSameLength = true;
49 string sequence = query->getAligned();
50 vector<seqDist> dists;
54 if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); }
56 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
58 if (templateSameLength && templateAligned) {
61 dists.resize(data.size());
63 //calc distance from this sequence to every sequence in the template
64 for (int i = 0; i < data.size(); i++) {
65 distCalculator->calcDist(*query, data[i]);
66 float dist = distCalculator->getDist();
68 //save distance to each template sequence
74 sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest
76 //save distance of best match
77 searchScore = dists[0].dist;
79 //fill topmatches with numwanted closest sequences indexes
80 for (int i = 0; i < numWanted; i++) {
81 topMatches.push_back(dists[i].seq2);
85 float smallDist = 100000;
86 for (int i = 0; i < data.size(); i++) {
87 distCalculator->calcDist(*query, data[i]);
88 float dist = distCalculator->getDist();
91 if (dist < smallDist) {
97 searchScore = smallDist;
98 topMatches.push_back(bestIndex);
102 m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine();
108 catch(exception& e) {
109 m->errorOut(e, "DistanceDB", "findClosestSequence");
113 /**************************************************************************************************/
114 bool DistanceDB::isAligned(string seq){
118 int pos = seq.find_first_of(".-");
120 if (pos != seq.npos) {
122 }else { aligned = false; }
127 catch(exception& e) {
128 m->errorOut(e, "DistanceDB", "isAligned");
133 /**************************************************************************************************/