]> git.donarmstrong.com Git - mothur.git/blob - distancedb.cpp
added count.groups command and paralellized align.seqs for windows
[mothur.git] / distancedb.cpp
1 /*
2  *  distancedb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/29/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10
11 #include "database.hpp"
12 #include "sequence.hpp"
13 #include "distancedb.hpp"
14 #include "onegapignore.h"
15
16
17 /**************************************************************************************************/
18 DistanceDB::DistanceDB(const DistanceDB& ddb) : data(ddb.data), templateSeqsLength(ddb.templateSeqsLength), templateAligned(ddb.templateAligned), Database(ddb) { 
19         distCalculator = new oneGapIgnoreTermGapDist(); 
20 }
21 /**************************************************************************************************/
22 DistanceDB::DistanceDB() : Database() { 
23         try {
24                 templateAligned = true;  
25                 templateSeqsLength = 0; 
26                 distCalculator = new oneGapIgnoreTermGapDist();
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "DistanceDB", "DistanceDB");
30                 exit(1);
31         }       
32 }
33 /**************************************************************************************************/
34 void DistanceDB::addSequence(Sequence seq) {
35         try {
36                 //are the template sequences aligned
37                 if (!isAligned(seq.getAligned())) {
38                         templateAligned = false;
39                         m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
40                         m->mothurOutEndLine(); 
41                 }
42                 
43                 if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
44                                 
45                 data.push_back(seq);
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "DistanceDB", "addSequence");
49                 exit(1);
50         }       
51 }
52 /**************************************************************************************************/
53 //returns indexes to top matches
54 vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
55         try {
56                 vector<int> topMatches;
57                 Scores.clear();
58                 bool templateSameLength = true;
59                 string sequence = query->getAligned();
60                 vector<seqDist> dists; 
61                 
62                 searchScore = -1.0;
63         
64                 if (numWanted > data.size()){
65                         m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
66                         m->mothurOutEndLine();
67                         numWanted = data.size();
68                 }
69                 
70                 if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
71                 
72                 if (templateSameLength && templateAligned) {
73                         if (numWanted != 1) {
74                                 
75                                 dists.resize(data.size());
76                                 
77                                 //calc distance from this sequence to every sequence in the template
78                                 for (int i = 0; i < data.size(); i++) {
79                                         distCalculator->calcDist(*query, data[i]);
80                                         float dist = distCalculator->getDist();
81                                         
82                                         //save distance to each template sequence
83                                         dists[i].seq1 = -1;
84                                         dists[i].seq2 = i;
85                                         dists[i].dist = dist;
86                                 }
87                                 
88                                 sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
89                                 
90                                 //save distance of best match
91                                 searchScore = dists[0].dist;
92                                 
93                                 //fill topmatches with numwanted closest sequences indexes
94                                 for (int i = 0; i < numWanted; i++) {
95                                         topMatches.push_back(dists[i].seq2);
96                                         Scores.push_back(dists[i].dist);
97                                 }
98                         }else {
99                                 int bestIndex = 0;
100                                 float smallDist = 100000;
101                                 for (int i = 0; i < data.size(); i++) {
102                                         distCalculator->calcDist(*query, data[i]);
103                                         float dist = distCalculator->getDist();
104                                         
105                                         //are you smaller?
106                                         if (dist < smallDist) {
107                                                 bestIndex = i;
108                                                 smallDist = dist;
109                                         }
110                                 }
111                                 searchScore = smallDist;
112                                 topMatches.push_back(bestIndex);
113                                 Scores.push_back(smallDist);
114                         }
115                 
116                 }else{
117                         m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
118                         m->mothurOutEndLine();
119                         exit(1);
120                 }
121                 
122                 return topMatches;
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "DistanceDB", "findClosestSequence");
126                 exit(1);
127         }       
128 }
129 /**************************************************************************************************/
130 bool DistanceDB::isAligned(string seq){
131         try {
132                 bool aligned;
133                 
134                 int pos = seq.find_first_of(".-");
135                 
136                 if (pos != seq.npos) {
137                         aligned = true;
138                 }else { aligned = false; }
139                 
140                 
141                 return aligned;
142         }
143         catch(exception& e) {
144                 m->errorOut(e, "DistanceDB", "isAligned");
145                 exit(1);
146         }       
147 }
148
149 /**************************************************************************************************/