5 * Created by Pat Schloss on 12/29/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "onegapignore.h"
17 /**************************************************************************************************/
18 DistanceDB::DistanceDB(const DistanceDB& ddb) : data(ddb.data), templateSeqsLength(ddb.templateSeqsLength), templateAligned(ddb.templateAligned), Database(ddb) {
19 distCalculator = new oneGapIgnoreTermGapDist();
21 /**************************************************************************************************/
22 DistanceDB::DistanceDB() : Database() {
24 templateAligned = true;
25 templateSeqsLength = 0;
26 distCalculator = new oneGapIgnoreTermGapDist();
29 m->errorOut(e, "DistanceDB", "DistanceDB");
33 /**************************************************************************************************/
34 void DistanceDB::addSequence(Sequence seq) {
36 //are the template sequences aligned
37 if (!isAligned(seq.getAligned())) {
38 templateAligned = false;
39 m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
40 m->mothurOutEndLine();
43 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
48 m->errorOut(e, "DistanceDB", "addSequence");
52 /**************************************************************************************************/
53 //returns indexes to top matches
54 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
56 vector<int> topMatches;
58 bool templateSameLength = true;
59 string sequence = query->getAligned();
60 vector<seqDist> dists;
64 if (numWanted > data.size()){
65 m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
66 m->mothurOutEndLine();
67 numWanted = data.size();
70 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
72 if (templateSameLength && templateAligned) {
75 dists.resize(data.size());
77 //calc distance from this sequence to every sequence in the template
78 for (int i = 0; i < data.size(); i++) {
79 distCalculator->calcDist(*query, data[i]);
80 float dist = distCalculator->getDist();
82 //save distance to each template sequence
88 sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest
90 //save distance of best match
91 searchScore = dists[0].dist;
93 //fill topmatches with numwanted closest sequences indexes
94 for (int i = 0; i < numWanted; i++) {
95 topMatches.push_back(dists[i].seq2);
96 Scores.push_back(dists[i].dist);
100 float smallDist = 100000;
101 for (int i = 0; i < data.size(); i++) {
102 distCalculator->calcDist(*query, data[i]);
103 float dist = distCalculator->getDist();
106 if (dist < smallDist) {
111 searchScore = smallDist;
112 topMatches.push_back(bestIndex);
113 Scores.push_back(smallDist);
117 m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
118 m->mothurOutEndLine();
124 catch(exception& e) {
125 m->errorOut(e, "DistanceDB", "findClosestSequence");
129 /**************************************************************************************************/
130 bool DistanceDB::isAligned(string seq){
134 int pos = seq.find_first_of(".-");
136 if (pos != seq.npos) {
138 }else { aligned = false; }
143 catch(exception& e) {
144 m->errorOut(e, "DistanceDB", "isAligned");
149 /**************************************************************************************************/