]> git.donarmstrong.com Git - mothur.git/blob - clustersplitcommand.cpp
fixed cluster.split command
[mothur.git] / clustersplitcommand.cpp
1 /*
2  *  clustersplitcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clustersplitcommand.h"
11 #include "readcluster.h"
12 #include "splitmatrix.h"
13 #include "readphylip.h"
14 #include "readcolumn.h"
15 #include "readmatrix.hpp"
16 #include "inputdata.h"
17
18 //**********************************************************************************************************************
19 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
20 ClusterSplitCommand::ClusterSplitCommand(string option)  {
21         try{
22                 globaldata = GlobalData::getInstance();
23                 abort = false;
24                 
25                 //allow user to run help
26                 if(option == "help") { help(); abort = true; }
27                 
28                 else {
29                         //valid paramters for this command
30                         string Array[] =  {"phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","showabund","timing","hard","processors","outputdir","inputdir"};
31                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
32                         
33                         OptionParser parser(option);
34                         map<string,string> parameters = parser.getParameters();
35                         
36                         ValidParameters validParameter;
37                 
38                         //check to make sure all parameters are valid for command
39                         map<string,string>::iterator it;
40                         for (it = parameters.begin(); it != parameters.end(); it++) { 
41                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
42                                         abort = true;
43                                 }
44                         }
45                         
46                         globaldata->newRead();
47                         
48                         //if the user changes the output directory command factory will send this info to us in the output parameter 
49                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
50                         
51                                 //if the user changes the input directory command factory will send this info to us in the output parameter 
52                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
53                         if (inputDir == "not found"){   inputDir = "";          }
54                         else {
55                                 string path;
56                                 it = parameters.find("phylip");
57                                 //user has given a template file
58                                 if(it != parameters.end()){ 
59                                         path = hasPath(it->second);
60                                         //if the user has not given a path then, add inputdir. else leave path alone.
61                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
62                                 }
63                                 
64                                 it = parameters.find("column");
65                                 //user has given a template file
66                                 if(it != parameters.end()){ 
67                                         path = hasPath(it->second);
68                                         //if the user has not given a path then, add inputdir. else leave path alone.
69                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
70                                 }
71                                 
72                                 it = parameters.find("name");
73                                 //user has given a template file
74                                 if(it != parameters.end()){ 
75                                         path = hasPath(it->second);
76                                         //if the user has not given a path then, add inputdir. else leave path alone.
77                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
78                                 }
79                                 
80                                 it = parameters.find("taxonomy");
81                                 //user has given a template file
82                                 if(it != parameters.end()){ 
83                                         path = hasPath(it->second);
84                                         //if the user has not given a path then, add inputdir. else leave path alone.
85                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
86                                 }
87                         }
88                         
89                         //check for required parameters
90                         phylipfile = validParameter.validFile(parameters, "phylip", true);
91                         if (phylipfile == "not open") { abort = true; }
92                         else if (phylipfile == "not found") { phylipfile = ""; }        
93                         else {  distfile = phylipfile;  format = "phylip";      }
94                         
95                         columnfile = validParameter.validFile(parameters, "column", true);
96                         if (columnfile == "not open") { abort = true; } 
97                         else if (columnfile == "not found") { columnfile = ""; }
98                         else {  distfile = columnfile; format = "column";       }
99                         
100                         namefile = validParameter.validFile(parameters, "name", true);
101                         if (namefile == "not open") { abort = true; }   
102                         else if (namefile == "not found") { namefile = ""; }
103                         
104                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
105                         if (taxFile == "not open") { abort = true; }    
106                         else if (taxFile == "not found") { taxFile = ""; }
107                         
108                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
109                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
110                 
111                         if (columnfile != "") {
112                                 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
113                         }
114                                         
115                         //check for optional parameter and set defaults
116                         // ...at some point should added some additional type checking...
117                         //get user cutoff and precision or use defaults
118                         string temp;
119                         temp = validParameter.validFile(parameters, "precision", false);
120                         if (temp == "not found") { temp = "100"; }
121                         //saves precision legnth for formatting below
122                         length = temp.length();
123                         convert(temp, precision); 
124                         
125                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
126                         hard = isTrue(temp);
127                         
128                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
129                         convert(temp, processors); 
130                         
131                         splitmethod = validParameter.validFile(parameters, "splitmethod", false);               if (splitmethod == "not found") { splitmethod = "distance"; }
132                         
133                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
134                         convert(temp, cutoff); 
135                         cutoff += (5 / (precision * 10.0));  
136                         
137                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
138                         convert(temp, taxLevelCutoff); 
139                         
140                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
141                         
142                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
143                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
144                         
145                         if ((splitmethod == "distance") || (splitmethod == "classify")) { }
146                         else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance or classify."); m->mothurOutEndLine(); abort = true; }
147                         
148                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
149
150                         showabund = validParameter.validFile(parameters, "showabund", false);
151                         if (showabund == "not found") { showabund = "T"; }
152
153                         timing = validParameter.validFile(parameters, "timing", false);
154                         if (timing == "not found") { timing = "F"; }
155                         
156                 }
157         }
158         catch(exception& e) {
159                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
160                 exit(1);
161         }
162 }
163
164 //**********************************************************************************************************************
165
166 void ClusterSplitCommand::help(){
167         try {
168                 m->mothurOut("The cluster.split command parameter options are phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, processors. Phylip or column and name are required.\n");
169                 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
170                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
171                 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
172                 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
173                 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
174                 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance or classify. \n");
175                 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
176                 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1. \n");
177                 m->mothurOut("The cluster.split command should be in the following format: \n");
178                 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
179                 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");       
180
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "ClusterSplitCommand", "help");
184                 exit(1);
185         }
186 }
187
188 //**********************************************************************************************************************
189
190 ClusterSplitCommand::~ClusterSplitCommand(){}
191
192 //**********************************************************************************************************************
193
194 int ClusterSplitCommand::execute(){
195         try {
196         
197                 if (abort == true) {    return 0;       }
198                 
199                 //****************** file prep work ******************************//
200                 
201                 //if user gave a phylip file convert to column file
202                 if (format == "phylip") {
203         
204                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
205                         
206                         NameAssignment* nameMap = NULL;
207                         convert->setFormat("phylip");
208                         convert->read(nameMap);
209                         
210                         if (m->control_pressed) {  delete convert;  return 0;  }
211                         
212                         distfile = convert->getOutputFile();
213                 
214                         //if no names file given with phylip file, create it
215                         ListVector* listToMakeNameFile =  convert->getListVector();
216                         if (namefile == "") {  //you need to make a namefile for split matrix
217                                 ofstream out;
218                                 namefile = phylipfile + ".names";
219                                 openOutputFile(namefile, out);
220                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
221                                         string bin = listToMakeNameFile->get(i);
222                                         out << bin << '\t' << bin << endl;
223                                 }
224                                 out.close();
225                         }
226                         delete listToMakeNameFile;
227                         delete convert;
228                 }
229                 if (m->control_pressed) { return 0; }
230                 
231                 time_t estart = time(NULL);
232                 
233                 //split matrix into non-overlapping groups
234                 SplitMatrix* split;
235                 if (splitmethod == "distance")  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod);                      }
236                 else                                                    {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod);  }
237                 
238                 split->split();
239                 
240                 if (m->control_pressed) { delete split; return 0; }
241                 
242                 string singletonName = split->getSingletonNames();
243                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
244                 delete split;
245                 
246                 if (m->control_pressed) { return 0; }
247                 
248                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
249                 estart = time(NULL);
250                 
251                 //****************** break up files between processes and cluster each file set ******************************//
252                 vector<string> listFileNames;
253                 set<string> labels;
254                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
255                                 if(processors == 1){
256                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
257                                 }else{
258                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
259                                         dividedNames.resize(processors);
260                                         
261                                         //for each file group figure out which process will complete it
262                                         //want to divide the load intelligently so the big files are spread between processes
263                                         int count = 1;
264                                         for (int i = 0; i < distName.size(); i++) { 
265                                                 int processToAssign = (i+1) % processors; 
266                                                 if (processToAssign == 0) { processToAssign = processors; }
267                                                 
268                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
269                                         }
270                                         
271                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
272                                         for (int i = 0; i < processors; i++) {
273                                                 int remainder = ((i+1) % processors);
274                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
275                                         }
276                                         
277                                         createProcesses(dividedNames);
278                                                         
279                                         if (m->control_pressed) { return 0; }
280
281                                         //get list of list file names from each process
282                                         for(int i=0;i<processors;i++){
283                                                 string filename = toString(processIDS[i]) + ".temp";
284                                                 ifstream in;
285                                                 openInputFile(filename, in);
286                                                 
287                                                 while(!in.eof()) {
288                                                         string tempName;
289                                                         in >> tempName; gobble(in);
290                                                         listFileNames.push_back(tempName);
291                                                 }
292                                                 in.close();
293                                                 remove((toString(processIDS[i]) + ".temp").c_str());
294                                                 
295                                                 //get labels
296                                                 filename = toString(processIDS[i]) + ".temp.labels";
297                                                 ifstream in2;
298                                                 openInputFile(filename, in2);
299                                                 
300                                                 while(!in2.eof()) {
301                                                         string tempName;
302                                                         in2 >> tempName; gobble(in);
303                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
304                                                 }
305                                                 in2.close();
306                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
307                                         }
308                                 }
309                 #else
310                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
311                 #endif
312                 
313                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
314                 
315                 //****************** merge list file and create rabund and sabund files ******************************//
316                 ListVector* listSingle;
317                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
318                 
319                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
320                 
321                 mergeLists(listFileNames, labelBins, listSingle);
322
323                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
324                 
325                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
326                 
327                 m->mothurOutEndLine();
328                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
329                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
330                 m->mothurOutEndLine();
331
332                 return 0;
333         }
334         catch(exception& e) {
335                 m->errorOut(e, "ClusterSplitCommand", "execute");
336                 exit(1);
337         }
338 }
339 //**********************************************************************************************************************
340 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string> userLabels, ListVector*& listSingle){
341         try {
342                                 
343                 map<float, int> labelBin;
344                 vector<float> orderFloat;
345                 int numSingleBins;
346                 
347                 //read in singletons
348                 if (singleton != "none") {
349                         ifstream in;
350                         openInputFile(singleton, in);
351                                 
352                         string firstCol, secondCol;
353                         listSingle = new ListVector();
354                         while (!in.eof()) {
355                                 in >> firstCol >> secondCol; gobble(in);
356                                 listSingle->push_back(secondCol);
357                         }
358                         in.close();
359                         remove(singleton.c_str());
360                         
361                         numSingleBins = listSingle->getNumBins();
362                 }else{  listSingle = NULL; numSingleBins = 0;  }
363                 
364                 //go through users set and make them floats so we can sort them 
365                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
366                         float temp = -10.0;
367
368                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
369                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
370                         
371                         orderFloat.push_back(temp);
372                         labelBin[temp] = numSingleBins; //initialize numbins 
373                 }
374         
375                 //sort order
376                 sort(orderFloat.begin(), orderFloat.end());
377                 userLabels.clear();
378                         
379                 //get the list info from each file
380                 for (int k = 0; k < listNames.size(); k++) {
381         
382                         if (m->control_pressed) {  
383                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
384                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
385                                 return labelBin;
386                         }
387                         
388                         InputData* input = new InputData(listNames[k], "list");
389                         ListVector* list = input->getListVector();
390                         string lastLabel = list->getLabel();
391                         
392                         string filledInList = listNames[k] + "filledInTemp";
393                         ofstream outFilled;
394                         openOutputFile(filledInList, outFilled);
395         
396                         //for each label needed
397                         for(int l = 0; l < orderFloat.size(); l++){
398                         
399                                 string thisLabel;
400                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
401                                 else { thisLabel = toString(orderFloat[l],  length-1);  } 
402
403                                 //this file has reached the end
404                                 if (list == NULL) { 
405                                         list = input->getListVector(lastLabel, true); 
406                                 }else{  //do you have the distance, or do you need to fill in
407                                                 
408                                         float labelFloat;
409                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
410                                         else { convert(list->getLabel(), labelFloat); }
411
412                                         //check for missing labels
413                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
414                                                 //if its bigger get last label, otherwise keep it
415                                                 delete list;
416                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
417                                         }
418                                         lastLabel = list->getLabel();
419                                 }
420                                 
421                                 //print to new file
422                                 list->setLabel(thisLabel);
423                                 list->print(outFilled);
424                 
425                                 //update labelBin
426                                 labelBin[orderFloat[l]] += list->getNumBins();
427                                                                         
428                                 delete list;
429                                                                         
430                                 list = input->getListVector();
431                         }
432                         
433                         if (list != NULL) { delete list; }
434                         delete input;
435                         
436                         outFilled.close();
437                         remove(listNames[k].c_str());
438                         rename(filledInList.c_str(), listNames[k].c_str());
439                 }
440                 
441                 return labelBin;
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
445                 exit(1);
446         }
447 }
448 //**********************************************************************************************************************
449 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
450         try {
451                 if (outputDir == "") { outputDir += hasPath(distfile); }
452                 fileroot = outputDir + getRootName(getSimpleName(distfile));
453                 
454                 openOutputFile(fileroot+ tag + ".sabund",       outSabund);
455                 openOutputFile(fileroot+ tag + ".rabund",       outRabund);
456                 openOutputFile(fileroot+ tag + ".list",         outList);
457                                 
458                 outputNames.push_back(fileroot+ tag + ".sabund");
459                 outputNames.push_back(fileroot+ tag + ".rabund");
460                 outputNames.push_back(fileroot+ tag + ".list");
461                 
462                 map<float, int>::iterator itLabel;
463
464                 //for each label needed
465                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
466                         
467                         string thisLabel;
468                         if (itLabel->first == -1) { thisLabel = "unique"; }
469                         else { thisLabel = toString(itLabel->first,  length-1);  } 
470                         
471                         outList << thisLabel << '\t' << itLabel->second << '\t';
472
473                         RAbundVector* rabund = new RAbundVector();
474                         rabund->setLabel(thisLabel);
475
476                         //add in singletons
477                         if (listSingle != NULL) {
478                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
479                                         outList << listSingle->get(j) << '\t';
480                                         rabund->push_back(getNumNames(listSingle->get(j)));
481                                 }
482                         }
483                         
484                         //get the list info from each file
485                         for (int k = 0; k < listNames.size(); k++) {
486         
487                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
488                                 
489                                 InputData* input = new InputData(listNames[k], "list");
490                                 ListVector* list = input->getListVector(thisLabel);
491                                 
492                                 //this file has reached the end
493                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
494                                 else {          
495                                         for (int j = 0; j < list->getNumBins(); j++) {
496                                                 outList << list->get(j) << '\t';
497                                                 rabund->push_back(getNumNames(list->get(j)));
498                                         }
499                                         delete list;
500                                 }
501                                 delete input;
502                         }
503                         
504                         SAbundVector sabund = rabund->getSAbundVector();
505                         
506                         sabund.print(outSabund);
507                         rabund->print(outRabund);
508                         outList << endl;
509                         
510                         delete rabund;
511                 }
512                 
513                 outList.close();
514                 outRabund.close();
515                 outSabund.close();
516                 
517                 if (listSingle != NULL) { delete listSingle;  }
518                 
519                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
520                 
521                 return 0;
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
525                 exit(1);
526         }
527 }
528
529 //**********************************************************************************************************************
530
531 void ClusterSplitCommand::printData(ListVector* oldList){
532         try {
533                 string label = oldList->getLabel();
534                 RAbundVector oldRAbund = oldList->getRAbundVector();
535                 
536                 oldRAbund.setLabel(label);
537                 if (isTrue(showabund)) {
538                         oldRAbund.getSAbundVector().print(cout);
539                 }
540                 oldRAbund.print(outRabund);
541                 oldRAbund.getSAbundVector().print(outSabund);
542         
543                 oldList->print(outList);
544         }
545         catch(exception& e) {
546                 m->errorOut(e, "ClusterSplitCommand", "printData");
547                 exit(1);
548         }
549 }
550 //**********************************************************************************************************************
551 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
552         try {
553         
554         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
555                 int process = 0;
556                 int exitCommand = 1;
557                 processIDS.clear();
558                 
559                 //loop through and create all the processes you want
560                 while (process != processors) {
561                         int pid = fork();
562                         
563                         if (pid > 0) {
564                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
565                                 process++;
566                         }else if (pid == 0){
567                                 set<string> labels;
568                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
569                                 
570                                 //write out names to file
571                                 string filename = toString(getpid()) + ".temp";
572                                 ofstream out;
573                                 openOutputFile(filename, out);
574                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
575                                 out.close();
576                                 
577                                 //print out labels
578                                 ofstream outLabels;
579                                 filename = toString(getpid()) + ".temp.labels";
580                                 openOutputFile(filename, outLabels);
581                 
582                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
583                                         outLabels << (*it) << endl;
584                                 }
585                                 outLabels.close();
586
587                                 exit(0);
588                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
589                 }
590                 
591                 //force parent to wait until all the processes are done
592                 for (int i=0;i<processors;i++) { 
593                         int temp = processIDS[i];
594                         wait(&temp);
595                 }
596                 
597                 return exitCommand;
598         #endif          
599         
600         }
601         catch(exception& e) {
602                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
603                 exit(1);
604         }
605 }
606 //**********************************************************************************************************************
607
608 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
609         try {
610                 Cluster* cluster;
611                 SparseMatrix* matrix;
612                 ListVector* list;
613                 ListVector oldList;
614                 RAbundVector* rabund;
615                 
616                 vector<string> listFileNames;
617                 
618                 //cluster each distance file
619                 for (int i = 0; i < distNames.size(); i++) {
620                         
621                         string thisNamefile = distNames[i].begin()->second;
622                         string thisDistFile = distNames[i].begin()->first;
623                         
624                         //read in distance file
625                         globaldata->setNameFile(thisNamefile);
626                         globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
627                         
628                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
629                         read->setCutoff(cutoff);
630
631                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
632                         nameMap->readMap();
633                         read->read(nameMap);
634                         
635                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
636                         
637                         list = read->getListVector();
638                         oldList = *list;
639                         matrix = read->getMatrix();
640                         
641                         delete read; 
642                         delete nameMap; 
643                         
644                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
645                 
646                         rabund = new RAbundVector(list->getRAbundVector());
647                         
648                         //create cluster
649                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
650                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
651                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
652                         tag = cluster->getTag();
653                 
654                         if (outputDir == "") { outputDir += hasPath(thisDistFile); }
655                         fileroot = outputDir + getRootName(getSimpleName(thisDistFile));
656                         
657                         ofstream listFile;
658                         openOutputFile(fileroot+ tag + ".list", listFile);
659                 
660                         listFileNames.push_back(fileroot+ tag + ".list");
661                 
662                         time_t estart = time(NULL);
663                         
664                         float previousDist = 0.00000;
665                         float rndPreviousDist = 0.00000;
666                         
667                         oldList = *list;
668
669                         print_start = true;
670                         start = time(NULL);
671                         double saveCutoff = cutoff;
672                 
673                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
674                 
675                                 if (m->control_pressed) { //clean up
676                                         delete matrix; delete list;     delete cluster; delete rabund;
677                                         listFile.close();
678                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
679                                         listFileNames.clear(); return listFileNames;
680                                 }
681                 
682                                 cluster->update(cutoff);
683         
684                                 float dist = matrix->getSmallDist();
685                                 float rndDist;
686                                 if (hard) {
687                                         rndDist = ceilDist(dist, precision); 
688                                 }else{
689                                         rndDist = roundDist(dist, precision); 
690                                 }
691
692                                 if(previousDist <= 0.0000 && dist != previousDist){
693                                         oldList.setLabel("unique");
694                                         oldList.print(listFile);
695                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
696                                 }
697                                 else if(rndDist != rndPreviousDist){
698                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
699                                         oldList.print(listFile);
700                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
701                                 }
702                 
703                                 previousDist = dist;
704                                 rndPreviousDist = rndDist;
705                                 oldList = *list;
706                         }
707
708                 
709                         if(previousDist <= 0.0000){
710                                 oldList.setLabel("unique");
711                                 oldList.print(listFile);
712                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
713                         }
714                         else if(rndPreviousDist<cutoff){
715                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
716                                 oldList.print(listFile);
717                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
718                         }
719         
720                         delete matrix; delete list;     delete cluster; delete rabund; 
721                         listFile.close();
722                         
723                         if (m->control_pressed) { //clean up
724                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
725                                 listFileNames.clear(); return listFileNames;
726                         }
727                         
728                         remove(thisDistFile.c_str());
729                         remove(thisNamefile.c_str());
730                 }
731                 
732                                         
733                 return listFileNames;
734         
735         }
736         catch(exception& e) {
737                 m->errorOut(e, "ClusterSplitCommand", "cluster");
738                 exit(1);
739         }
740
741
742 }
743
744 //**********************************************************************************************************************