]> git.donarmstrong.com Git - mothur.git/blob - distancedb.cpp
changes while testing
[mothur.git] / distancedb.cpp
1 /*
2  *  distancedb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/29/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "onegapignore.h"
15
16
17 /**************************************************************************************************/
18 DistanceDB::DistanceDB() : Database() { 
19         try {
20                 templateAligned = true;  
21                 templateSeqsLength = 0; 
22                 distCalculator = new oneGapIgnoreTermGapDist();
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "DistanceDB", "DistanceDB");
26                 exit(1);
27         }       
28 }
29 /**************************************************************************************************/
30 void DistanceDB::addSequence(Sequence seq) {
31         try {
32                 //are the template sequences aligned
33                 if (!isAligned(seq.getAligned())) {
34                         templateAligned = false;
35                         m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
36                         m->mothurOutEndLine(); 
37                 }
38                 
39                 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
40                                 
41                 data.push_back(seq);
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "DistanceDB", "addSequence");
45                 exit(1);
46         }       
47 }
48 /**************************************************************************************************/
49 //returns indexes to top matches
50 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
51         try {
52                 vector<int> topMatches;
53                 Scores.clear();
54                 bool templateSameLength = true;
55                 string sequence = query->getAligned();
56                 vector<seqDist> dists; 
57                 
58                 searchScore = -1.0;
59         
60                 if (numWanted > data.size()){
61                         m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
62                         m->mothurOutEndLine();
63                         numWanted = data.size();
64                 }
65                 
66                 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
67                 
68                 if (templateSameLength && templateAligned) {
69                         if (numWanted != 1) {
70                                 
71                                 dists.resize(data.size());
72                                 
73                                 //calc distance from this sequence to every sequence in the template
74                                 for (int i = 0; i < data.size(); i++) {
75                                         distCalculator->calcDist(*query, data[i]);
76                                         float dist = distCalculator->getDist();
77                                         
78                                         //save distance to each template sequence
79                                         dists[i].seq1 = -1;
80                                         dists[i].seq2 = i;
81                                         dists[i].dist = dist;
82                                 }
83                                 
84                                 sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
85                                 
86                                 //save distance of best match
87                                 searchScore = dists[0].dist;
88                                 
89                                 //fill topmatches with numwanted closest sequences indexes
90                                 for (int i = 0; i < numWanted; i++) {
91                                         topMatches.push_back(dists[i].seq2);
92                                         Scores.push_back(dists[i].dist);
93                                 }
94                         }else {
95                                 int bestIndex = 0;
96                                 float smallDist = 100000;
97                                 for (int i = 0; i < data.size(); i++) {
98                                         distCalculator->calcDist(*query, data[i]);
99                                         float dist = distCalculator->getDist();
100                                         
101                                         //are you smaller?
102                                         if (dist < smallDist) {
103                                                 bestIndex = i;
104                                                 smallDist = dist;
105                                         }
106                                 }
107                                 searchScore = smallDist;
108                                 topMatches.push_back(bestIndex);
109                                 Scores.push_back(smallDist);
110                         }
111                 
112                 }else{
113                         m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
114                         m->mothurOutEndLine();
115                         exit(1);
116                 }
117                 
118                 return topMatches;
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "DistanceDB", "findClosestSequence");
122                 exit(1);
123         }       
124 }
125 /**************************************************************************************************/
126 bool DistanceDB::isAligned(string seq){
127         try {
128                 bool aligned;
129                 
130                 int pos = seq.find_first_of(".-");
131                 
132                 if (pos != seq.npos) {
133                         aligned = true;
134                 }else { aligned = false; }
135                 
136                 
137                 return aligned;
138         }
139         catch(exception& e) {
140                 m->errorOut(e, "DistanceDB", "isAligned");
141                 exit(1);
142         }       
143 }
144
145 /**************************************************************************************************/