]> git.donarmstrong.com Git - mothur.git/blob - distancedb.cpp
*** empty log message ***
[mothur.git] / distancedb.cpp
1 /*
2  *  distancedb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/29/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "onegapignore.h"
15
16 /**************************************************************************************************/
17 DistanceDB::DistanceDB() { 
18         try {
19                 templateAligned = true;  
20                 templateSeqsLength = 0; 
21                 distCalculator = new oneGapIgnoreTermGapDist();
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "DistanceDB", "DistanceDB");
25                 exit(1);
26         }       
27 }
28 /**************************************************************************************************/
29 void DistanceDB::addSequence(Sequence seq) {
30         try {
31                 //are the template sequences aligned
32                 if (!isAligned(seq.getAligned())) {
33                         templateAligned = false;
34                         m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
35                         m->mothurOutEndLine(); 
36                 }
37                 
38                 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
39                                 
40                 data.push_back(seq);
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "DistanceDB", "addSequence");
44                 exit(1);
45         }       
46 }
47 /**************************************************************************************************/
48 //returns indexes to top matches
49 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
50         try {
51                 vector<int> topMatches;
52                 bool templateSameLength = true;
53                 string sequence = query->getAligned();
54                 vector<seqDist> dists; 
55                 
56                 searchScore = -1.0;
57         
58                 if (numWanted > data.size()){
59                         m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
60                         m->mothurOutEndLine();
61                         numWanted = data.size();
62                 }
63                 
64                 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
65                 
66                 if (templateSameLength && templateAligned) {
67                         if (numWanted != 1) {
68                                 
69                                 dists.resize(data.size());
70                                 
71                                 //calc distance from this sequence to every sequence in the template
72                                 for (int i = 0; i < data.size(); i++) {
73                                         distCalculator->calcDist(*query, data[i]);
74                                         float dist = distCalculator->getDist();
75                                         
76                                         //save distance to each template sequence
77                                         dists[i].seq1 = -1;
78                                         dists[i].seq2 = i;
79                                         dists[i].dist = dist;
80                                 }
81                                 
82                                 sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
83                                 
84                                 //save distance of best match
85                                 searchScore = dists[0].dist;
86                                 
87                                 //fill topmatches with numwanted closest sequences indexes
88                                 for (int i = 0; i < numWanted; i++) {
89                                         topMatches.push_back(dists[i].seq2);
90                                 }
91                         }else {
92                                 int bestIndex = 0;
93                                 float smallDist = 100000;
94                                 for (int i = 0; i < data.size(); i++) {
95                                         distCalculator->calcDist(*query, data[i]);
96                                         float dist = distCalculator->getDist();
97                                         
98                                         //are you smaller?
99                                         if (dist < smallDist) {
100                                                 bestIndex = i;
101                                                 smallDist = dist;
102                                         }
103                                 }
104                                 searchScore = smallDist;
105                                 topMatches.push_back(bestIndex);
106                         }
107                 
108                 }else{
109                         m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
110                         m->mothurOutEndLine();
111                         exit(1);
112                 }
113                 
114                 return topMatches;
115         }
116         catch(exception& e) {
117                 m->errorOut(e, "DistanceDB", "findClosestSequence");
118                 exit(1);
119         }       
120 }
121 /**************************************************************************************************/
122 bool DistanceDB::isAligned(string seq){
123         try {
124                 bool aligned;
125                 
126                 int pos = seq.find_first_of(".-");
127                 
128                 if (pos != seq.npos) {
129                         aligned = true;
130                 }else { aligned = false; }
131                 
132                 
133                 return aligned;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "DistanceDB", "isAligned");
137                 exit(1);
138         }       
139 }
140
141 /**************************************************************************************************/