]> git.donarmstrong.com Git - mothur.git/blob - distancedb.cpp
fixed trim.seqs bug with qtrim parameter and added num=1 special case to database...
[mothur.git] / distancedb.cpp
1 /*
2  *  distancedb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/29/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "eachgapdist.h"
15
16 /**************************************************************************************************/
17 DistanceDB::DistanceDB() { 
18         try {
19                 templateAligned = true;  
20                 templateSeqsLength = 0; 
21                 distCalculator = new eachGapDist();
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "DistanceDB", "DistanceDB");
25                 exit(1);
26         }       
27 }
28 /**************************************************************************************************/
29 void DistanceDB::addSequence(Sequence seq) {
30         try {
31                 //are the template sequences aligned
32                 if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); }
33                 
34                 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
35                                 
36                 data.push_back(seq);
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "DistanceDB", "addSequence");
40                 exit(1);
41         }       
42 }
43 /**************************************************************************************************/
44 //returns indexes to top matches
45 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
46         try {
47                 vector<int> topMatches;
48                 bool templateSameLength = true;
49                 string sequence = query->getAligned();
50                 vector<seqDist> dists; 
51                 
52                 searchScore = -1.0;
53         
54                 if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); }
55                 
56                 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
57                 
58                 if (templateSameLength && templateAligned) {
59                         if (numWanted != 1) {
60                                 
61                                 dists.resize(data.size());
62                                 
63                                 //calc distance from this sequence to every sequence in the template
64                                 for (int i = 0; i < data.size(); i++) {
65                                         distCalculator->calcDist(*query, data[i]);
66                                         float dist = distCalculator->getDist();
67                                         
68                                         //save distance to each template sequence
69                                         dists[i].seq1 = -1;
70                                         dists[i].seq2 = i;
71                                         dists[i].dist = dist;
72                                 }
73                                 
74                                 sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
75                                 
76                                 //save distance of best match
77                                 searchScore = dists[0].dist;
78                                 
79                                 //fill topmatches with numwanted closest sequences indexes
80                                 for (int i = 0; i < numWanted; i++) {
81                                         topMatches.push_back(dists[i].seq2);
82                                 }
83                         }else {
84                                 int bestIndex = 0;
85                                 float smallDist = 100000;
86                                 for (int i = 0; i < data.size(); i++) {
87                                         distCalculator->calcDist(*query, data[i]);
88                                         float dist = distCalculator->getDist();
89                                         
90                                         //are you smaller?
91                                         if (dist < smallDist) {
92                                                 bestIndex = i;
93                                                 smallDist = dist;
94                                         }
95                                 }
96                                 
97                                 searchScore = smallDist;
98                                 topMatches.push_back(bestIndex);
99                         }
100                 
101                 }else{
102                         m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine();
103                         exit(1);
104                 }
105                 
106                 return topMatches;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "DistanceDB", "findClosestSequence");
110                 exit(1);
111         }       
112 }
113 /**************************************************************************************************/
114 bool DistanceDB::isAligned(string seq){
115         try {
116                 bool aligned;
117                 
118                 int pos = seq.find_first_of(".-");
119                 
120                 if (pos != seq.npos) {
121                         aligned = true;
122                 }else { aligned = false; }
123                 
124                 
125                 return aligned;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "DistanceDB", "isAligned");
129                 exit(1);
130         }       
131 }
132
133 /**************************************************************************************************/