]> git.donarmstrong.com Git - mothur.git/blob - distancedb.cpp
c1bf7e7fb08dc3fae5b3cf17f575580269677148
[mothur.git] / distancedb.cpp
1 /*
2  *  distancedb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/29/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "onegapignore.h"
15
16 /**************************************************************************************************/
17 DistanceDB::DistanceDB() { 
18         try {
19                 templateAligned = true;  
20                 templateSeqsLength = 0; 
21                 distCalculator = new oneGapIgnoreTermGapDist();
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "DistanceDB", "DistanceDB");
25                 exit(1);
26         }       
27 }
28 /**************************************************************************************************/
29 void DistanceDB::addSequence(Sequence seq) {
30         try {
31                 //are the template sequences aligned
32                 if (!isAligned(seq.getAligned())) {
33                         templateAligned = false;
34                         m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
35                         m->mothurOutEndLine(); 
36                 }
37                 
38                 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
39                                 
40                 data.push_back(seq);
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "DistanceDB", "addSequence");
44                 exit(1);
45         }       
46 }
47 /**************************************************************************************************/
48 //returns indexes to top matches
49 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
50         try {
51                 vector<int> topMatches;
52                 Scores.clear();
53                 bool templateSameLength = true;
54                 string sequence = query->getAligned();
55                 vector<seqDist> dists; 
56                 
57                 searchScore = -1.0;
58         
59                 if (numWanted > data.size()){
60                         m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
61                         m->mothurOutEndLine();
62                         numWanted = data.size();
63                 }
64                 
65                 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
66                 
67                 if (templateSameLength && templateAligned) {
68                         if (numWanted != 1) {
69                                 
70                                 dists.resize(data.size());
71                                 
72                                 //calc distance from this sequence to every sequence in the template
73                                 for (int i = 0; i < data.size(); i++) {
74                                         distCalculator->calcDist(*query, data[i]);
75                                         float dist = distCalculator->getDist();
76                                         
77                                         //save distance to each template sequence
78                                         dists[i].seq1 = -1;
79                                         dists[i].seq2 = i;
80                                         dists[i].dist = dist;
81                                 }
82                                 
83                                 sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
84                                 
85                                 //save distance of best match
86                                 searchScore = dists[0].dist;
87                                 
88                                 //fill topmatches with numwanted closest sequences indexes
89                                 for (int i = 0; i < numWanted; i++) {
90                                         topMatches.push_back(dists[i].seq2);
91                                         Scores.push_back(dists[i].dist);
92                                 }
93                         }else {
94                                 int bestIndex = 0;
95                                 float smallDist = 100000;
96                                 for (int i = 0; i < data.size(); i++) {
97                                         distCalculator->calcDist(*query, data[i]);
98                                         float dist = distCalculator->getDist();
99                                         
100                                         //are you smaller?
101                                         if (dist < smallDist) {
102                                                 bestIndex = i;
103                                                 smallDist = dist;
104                                         }
105                                 }
106                                 searchScore = smallDist;
107                                 topMatches.push_back(bestIndex);
108                                 Scores.push_back(smallDist);
109                         }
110                 
111                 }else{
112                         m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
113                         m->mothurOutEndLine();
114                         exit(1);
115                 }
116                 
117                 return topMatches;
118         }
119         catch(exception& e) {
120                 m->errorOut(e, "DistanceDB", "findClosestSequence");
121                 exit(1);
122         }       
123 }
124 /**************************************************************************************************/
125 bool DistanceDB::isAligned(string seq){
126         try {
127                 bool aligned;
128                 
129                 int pos = seq.find_first_of(".-");
130                 
131                 if (pos != seq.npos) {
132                         aligned = true;
133                 }else { aligned = false; }
134                 
135                 
136                 return aligned;
137         }
138         catch(exception& e) {
139                 m->errorOut(e, "DistanceDB", "isAligned");
140                 exit(1);
141         }       
142 }
143
144 /**************************************************************************************************/