]> git.donarmstrong.com Git - mothur.git/blob - clustercommand.cpp
fixes while testing
[mothur.git] / clustercommand.cpp
1 /*
2  *  clustercommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "clustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ClusterCommand::getValidParameters(){    
14         try {
15                 string AlignArray[] =  {"cutoff","precision","method","showabund","timing","hard","outputdir","inputdir"};
16                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ClusterCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 ClusterCommand::ClusterCommand(){       
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["list"] = tempOutNames;
31                 outputTypes["rabund"] = tempOutNames;
32                 outputTypes["sabund"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ClusterCommand", "ClusterCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> ClusterCommand::getRequiredParameters(){ 
41         try {
42                 vector<string> myArray; 
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "ClusterCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> ClusterCommand::getRequiredFiles(){      
52         try {
53                 string Array[] =  {"phylip","column","or"};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "ClusterCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
64 ClusterCommand::ClusterCommand(string option)  {
65         try{
66                 globaldata = GlobalData::getInstance();
67                 
68                 abort = false;
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"cutoff","precision","method","showabund","timing","hard","outputdir","inputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string,string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                 
83                         //check to make sure all parameters are valid for command
84                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
86                                         abort = true;
87                                 }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92                         outputTypes["list"] = tempOutNames;
93                         outputTypes["rabund"] = tempOutNames;
94                         outputTypes["sabund"] = tempOutNames;
95                 
96                         //if the user changes the output directory command factory will send this info to us in the output parameter 
97                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
98                         
99                         //error checking to make sure they read a distance file
100                         if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
101                                 m->mothurOut("Before you use the cluster command, you first need to read in a distance matrix."); m->mothurOutEndLine();
102                                 abort = true;
103                         } 
104                 
105                         //check for optional parameter and set defaults
106                         // ...at some point should added some additional type checking...
107                         //get user cutoff and precision or use defaults
108                         string temp;
109                         temp = validParameter.validFile(parameters, "precision", false);
110                         if (temp == "not found") { temp = "100"; }
111                         //saves precision legnth for formatting below
112                         length = temp.length();
113                         convert(temp, precision); 
114                         
115                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
116                         hard = m->isTrue(temp);
117                         
118                         temp = validParameter.validFile(parameters, "cutoff", false);
119                         if (temp == "not found") { temp = "10"; }
120                         convert(temp, cutoff); 
121                         cutoff += (5 / (precision * 10.0));  
122                         
123                         method = validParameter.validFile(parameters, "method", false);
124                         if (method == "not found") { method = "furthest"; }
125                         
126                         if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { }
127                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, and weighted."); m->mothurOutEndLine(); abort = true; }
128
129                         showabund = validParameter.validFile(parameters, "showabund", false);
130                         if (showabund == "not found") { showabund = "T"; }
131
132                         timing = validParameter.validFile(parameters, "timing", false);
133                         if (timing == "not found") { timing = "F"; }
134                         
135                         if (abort == false) {
136                         
137         
138                                                         //get matrix, list and rabund for execute
139                                 if(globaldata->gSparseMatrix != NULL)   {       matrix = globaldata->gSparseMatrix;             }
140                         
141                                 if(globaldata->gListVector != NULL){
142                                         list = globaldata->gListVector;
143                                         rabund = new RAbundVector(list->getRAbundVector());
144                                 }
145                                 
146                                 //create cluster
147                                 if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
148                                 else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
149                                 else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
150                                 else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method);    }
151                                 tag = cluster->getTag();
152                                 
153                                 if (outputDir == "") { outputDir += m->hasPath(globaldata->inputFileName); }
154                                 fileroot = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
155                         
156                                 m->openOutputFile(fileroot+ tag + ".sabund",    sabundFile);
157                                 m->openOutputFile(fileroot+ tag + ".rabund",    rabundFile);
158                                 m->openOutputFile(fileroot+ tag + ".list",              listFile);
159                                 
160                                 outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
161                                 outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
162                                 outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
163                         }
164                 }
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "ClusterCommand", "ClusterCommand");
168                 exit(1);
169         }
170 }
171
172 //**********************************************************************************************************************
173
174 void ClusterCommand::help(){
175         try {
176                 m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
177                 m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n");
178                 m->mothurOut("The cluster command should be in the following format: \n");
179                 m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
180                 m->mothurOut("The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");      
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "ClusterCommand", "help");
184                 exit(1);
185         }
186 }
187
188 //**********************************************************************************************************************
189
190 ClusterCommand::~ClusterCommand(){
191         if (abort == false) {
192                 delete cluster;
193                 delete rabund;
194         }
195 }
196
197 //**********************************************************************************************************************
198
199 int ClusterCommand::execute(){
200         try {
201         
202                 if (abort == true) {    return 0;       }
203                 
204                 time_t estart = time(NULL);
205                 //int ndist = matrix->getNNodes();
206                 float previousDist = 0.00000;
207                 float rndPreviousDist = 0.00000;
208                 oldRAbund = *rabund;
209                 oldList = *list;
210
211                 print_start = true;
212                 start = time(NULL);
213                 loops = 0;
214                 double saveCutoff = cutoff;
215                 
216                 while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
217                 
218                         if (m->control_pressed) { //clean up
219                                 delete globaldata->gSparseMatrix;  globaldata->gSparseMatrix = NULL;
220                                 delete globaldata->gListVector;  globaldata->gListVector = NULL;
221                                 if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); }
222                                 else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); }
223                                 sabundFile.close();rabundFile.close();listFile.close();
224                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         } outputTypes.clear();
225                                 return 0;
226                         }
227                 
228                         if (print_start && m->isTrue(timing)) {
229                                 m->mothurOut("Clustering (" + tag + ") dist " + toString(matrix->getSmallDist()) + "/" 
230                                         + toString(m->roundDist(matrix->getSmallDist(), precision)) 
231                                         + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
232                                 cout.flush();
233                                 print_start = false;
234                         }
235
236                         loops++;
237
238                         cluster->update(cutoff);
239         
240                         float dist = matrix->getSmallDist();
241                         float rndDist;
242                         if (hard) {
243                                 rndDist = m->ceilDist(dist, precision); 
244                         }else{
245                                 rndDist = m->roundDist(dist, precision); 
246                         }
247
248                         if(previousDist <= 0.0000 && dist != previousDist){
249                                 printData("unique");
250                         }
251                         else if(rndDist != rndPreviousDist){
252                                 printData(toString(rndPreviousDist,  length-1));
253                         }
254                 
255                         previousDist = dist;
256                         rndPreviousDist = rndDist;
257                         oldRAbund = *rabund;
258                         oldList = *list;
259                 }
260
261                 if (print_start && m->isTrue(timing)) {
262                         m->mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) 
263                                          + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
264                         cout.flush();
265                         print_start = false;
266                 }
267         
268                 if(previousDist <= 0.0000){
269                         printData("unique");
270                 }
271                 else if(rndPreviousDist<cutoff){
272                         printData(toString(rndPreviousDist, length-1));
273                 }
274                 
275                 //delete globaldata's copy of the sparsematrix and listvector to free up memory
276                 delete globaldata->gSparseMatrix;  globaldata->gSparseMatrix = NULL;
277                 delete globaldata->gListVector;  globaldata->gListVector = NULL;
278         
279                 //saves .list file so you can do the collect, rarefaction and summary commands without doing a read.list
280                 if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); }
281                 else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); }
282                 
283                 globaldata->setListFile(fileroot+ tag + ".list");
284                 globaldata->setNameFile("");
285                 globaldata->setFormat("list");
286                 
287                 sabundFile.close();
288                 rabundFile.close();
289                 listFile.close();
290         
291                 if (saveCutoff != cutoff) { 
292                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
293                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
294
295                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
296                 }
297                 
298                 m->mothurOutEndLine();
299                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
300                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
301                 m->mothurOutEndLine();
302
303                 
304                 //if (m->isTrue(timing)) {
305                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
306                 //}
307                 
308                 
309                 return 0;
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "ClusterCommand", "execute");
313                 exit(1);
314         }
315 }
316
317 //**********************************************************************************************************************
318
319 void ClusterCommand::printData(string label){
320         try {
321                 if (m->isTrue(timing)) {
322                         m->mothurOut("\tTime: " + toString(time(NULL) - start) + "\tsecs for " + toString(oldRAbund.getNumBins()) 
323                      + "\tclusters. Updates: " + toString(loops)); m->mothurOutEndLine();
324                 }
325                 print_start = true;
326                 loops = 0;
327                 start = time(NULL);
328
329                 oldRAbund.setLabel(label);
330                 if (m->isTrue(showabund)) {
331                         oldRAbund.getSAbundVector().print(cout);
332                 }
333                 oldRAbund.print(rabundFile);
334                 oldRAbund.getSAbundVector().print(sabundFile);
335         
336                 oldList.setLabel(label);
337                 oldList.print(listFile);
338         }
339         catch(exception& e) {
340                 m->errorOut(e, "ClusterCommand", "printData");
341                 exit(1);
342         }
343
344
345 }
346 //**********************************************************************************************************************