]> git.donarmstrong.com Git - mothur.git/blob - clustersplitcommand.cpp
1.18.0
[mothur.git] / clustersplitcommand.cpp
1 /*
2  *  clustersplitcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clustersplitcommand.h"
11 #include "readcluster.h"
12 #include "splitmatrix.h"
13 #include "readphylip.h"
14 #include "readcolumn.h"
15 #include "readmatrix.hpp"
16 #include "inputdata.h"
17
18
19 //**********************************************************************************************************************
20 vector<string> ClusterSplitCommand::setParameters(){    
21         try {
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
33                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
39                         
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ClusterSplitCommand::getHelpString(){    
51         try {
52                 string helpString = "";
53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
70 #ifdef USE_MPI
71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
72 #endif
73                 helpString += "The cluster.split command should be in the following format: \n";
74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";       
76                 return helpString;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 ClusterSplitCommand::ClusterSplitCommand(){     
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["list"] = tempOutNames;
90                 outputTypes["rabund"] = tempOutNames;
91                 outputTypes["sabund"] = tempOutNames;
92                 outputTypes["column"] = tempOutNames;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
96                 exit(1);
97         }
98 }
99 //**********************************************************************************************************************
100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
102         try{
103                 abort = false; calledHelp = false;   
104                 format = "";
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 
109                 else {
110                         vector<string> myArray = setParameters();
111                         
112                         OptionParser parser(option);
113                         map<string,string> parameters = parser.getParameters();
114                         
115                         ValidParameters validParameter("cluster.split");
116                 
117                         //check to make sure all parameters are valid for command
118                         map<string,string>::iterator it;
119                         for (it = parameters.begin(); it != parameters.end(); it++) { 
120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
121                                         abort = true;
122                                 }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["list"] = tempOutNames;
128                         outputTypes["rabund"] = tempOutNames;
129                         outputTypes["sabund"] = tempOutNames;
130                         outputTypes["column"] = tempOutNames;
131                         
132                         //if the user changes the output directory command factory will send this info to us in the output parameter 
133                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
134                         
135                                 //if the user changes the input directory command factory will send this info to us in the output parameter 
136                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
137                         if (inputDir == "not found"){   inputDir = "";          }
138                         else {
139                                 string path;
140                                 it = parameters.find("phylip");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
146                                 }
147                                 
148                                 it = parameters.find("column");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
154                                 }
155                                 
156                                 it = parameters.find("name");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
162                                 }
163                                 
164                                 it = parameters.find("taxonomy");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
170                                 }
171                                 
172                                 it = parameters.find("fasta");
173                                 //user has given a template file
174                                 if(it != parameters.end()){ 
175                                         path = m->hasPath(it->second);
176                                         //if the user has not given a path then, add inputdir. else leave path alone.
177                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
178                                 }
179                         }
180                         
181                         //check for required parameters
182                         phylipfile = validParameter.validFile(parameters, "phylip", true);
183                         if (phylipfile == "not open") { abort = true; }
184                         else if (phylipfile == "not found") { phylipfile = ""; }        
185                         else {  distfile = phylipfile;  format = "phylip";      }
186                         
187                         columnfile = validParameter.validFile(parameters, "column", true);
188                         if (columnfile == "not open") { abort = true; } 
189                         else if (columnfile == "not found") { columnfile = ""; }
190                         else {  distfile = columnfile; format = "column";       }
191                         
192                         namefile = validParameter.validFile(parameters, "name", true);
193                         if (namefile == "not open") { abort = true; }   
194                         else if (namefile == "not found") { namefile = ""; }
195                         
196                         fastafile = validParameter.validFile(parameters, "fasta", true);
197                         if (fastafile == "not open") { abort = true; }  
198                         else if (fastafile == "not found") { fastafile = ""; }
199                         else { distfile = fastafile;  splitmethod = "fasta";  }
200                         
201                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
202                         if (taxFile == "not open") { abort = true; }    
203                         else if (taxFile == "not found") { taxFile = ""; }
204                         
205                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { 
206                                 //is there are current file available for either of these?
207                                 //give priority to column, then phylip, then fasta
208                                 columnfile = m->getColumnFile(); 
209                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
210                                 else { 
211                                         phylipfile = m->getPhylipFile(); 
212                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
213                                         else { 
214                                                 fastafile = m->getFastaFile(); 
215                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
216                                                 else { 
217                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); 
218                                                         abort = true; 
219                                                 }
220                                         }
221                                 }
222                         }
223                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
224                 
225                         if (columnfile != "") {
226                                 if (namefile == "") { 
227                                         namefile = m->getNameFile(); 
228                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
229                                         else { 
230                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
231                                                 abort = true; 
232                                         }       
233                                 }
234                         }
235                         
236                         if (fastafile != "") {
237                                 if (taxFile == "") { 
238                                         taxFile = m->getTaxonomyFile(); 
239                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
240                                         else { 
241                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
242                                                 abort = true; 
243                                         }       
244                                 }
245                                 
246                                 if (namefile == "") { 
247                                         namefile = m->getNameFile(); 
248                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
249                                         else { 
250                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
251                                                 abort = true; 
252                                         }       
253                                 }
254                         }
255                                         
256                         //check for optional parameter and set defaults
257                         // ...at some point should added some additional type checking...
258                         //get user cutoff and precision or use defaults
259                         string temp;
260                         temp = validParameter.validFile(parameters, "precision", false);
261                         if (temp == "not found") { temp = "100"; }
262                         //saves precision legnth for formatting below
263                         length = temp.length();
264                         convert(temp, precision); 
265                         
266                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
267                         hard = m->isTrue(temp);
268                         
269                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
270                         large = m->isTrue(temp);
271                         
272                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
273                         m->setProcessors(temp);
274                         convert(temp, processors);
275                         
276                         temp = validParameter.validFile(parameters, "splitmethod", false);      
277                         if (splitmethod != "fasta") {
278                                 if (temp == "not found")  { splitmethod = "distance"; }
279                                 else {  splitmethod = temp; }
280                         }
281                         
282                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
283                         convert(temp, cutoff); 
284                         cutoff += (5 / (precision * 10.0));  
285                         
286                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
287                         convert(temp, taxLevelCutoff); 
288                         
289                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
290                         
291                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
292                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
293                         
294                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
295                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
296                         
297                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
298
299                         showabund = validParameter.validFile(parameters, "showabund", false);
300                         if (showabund == "not found") { showabund = "T"; }
301
302                         timing = validParameter.validFile(parameters, "timing", false);
303                         if (timing == "not found") { timing = "F"; }
304                         
305                 }
306         }
307         catch(exception& e) {
308                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
309                 exit(1);
310         }
311 }
312
313 //**********************************************************************************************************************
314
315 int ClusterSplitCommand::execute(){
316         try {
317         
318                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
319                 
320                 time_t estart;
321                 vector<string> listFileNames;
322                 set<string> labels;
323                 string singletonName = "";
324                 double saveCutoff = cutoff;
325
326                 //****************** file prep work ******************************//
327                 #ifdef USE_MPI
328                         int pid;
329                         int tag = 2001;
330                         MPI_Status status; 
331                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
332                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
333                         
334                         if (pid == 0) { //only process 0 converts and splits
335                         
336                 #endif
337                 
338                 //if user gave a phylip file convert to column file
339                 if (format == "phylip") {
340                         estart = time(NULL);
341                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
342                         
343                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
344                         
345                         NameAssignment* nameMap = NULL;
346                         convert->setFormat("phylip");
347                         convert->read(nameMap);
348                         
349                         if (m->control_pressed) {  delete convert;  return 0;  }
350                         
351                         distfile = convert->getOutputFile();
352                 
353                         //if no names file given with phylip file, create it
354                         ListVector* listToMakeNameFile =  convert->getListVector();
355                         if (namefile == "") {  //you need to make a namefile for split matrix
356                                 ofstream out;
357                                 namefile = phylipfile + ".names";
358                                 m->openOutputFile(namefile, out);
359                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
360                                         string bin = listToMakeNameFile->get(i);
361                                         out << bin << '\t' << bin << endl;
362                                 }
363                                 out.close();
364                         }
365                         delete listToMakeNameFile;
366                         delete convert;
367                         
368                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
369                 }
370                 if (m->control_pressed) { return 0; }
371                 
372                 estart = time(NULL);
373                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
374                 
375                 //split matrix into non-overlapping groups
376                 SplitMatrix* split;
377                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
378                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
379                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
380                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
381                 
382                 split->split();
383                 
384                 if (m->control_pressed) { delete split; return 0; }
385                 
386                 singletonName = split->getSingletonNames();
387                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
388                 delete split;
389                 
390                 //output a merged distance file
391                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
392                         
393                                 
394                 if (m->control_pressed) { return 0; }
395                 
396                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
397                 estart = time(NULL);
398                 
399                 //****************** break up files between processes and cluster each file set ******************************//
400         #ifdef USE_MPI
401                         ////you are process 0 from above////
402                         
403                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...                           
404                         dividedNames.resize(processors);
405                                         
406                         //for each file group figure out which process will complete it
407                         //want to divide the load intelligently so the big files are spread between processes
408                         for (int i = 0; i < distName.size(); i++) { 
409                                 int processToAssign = (i+1) % processors; 
410                                 if (processToAssign == 0) { processToAssign = processors; }
411                                                 
412                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
413                         }
414                                         
415                         //not lets reverse the order of ever other process, so we balance big files running with little ones
416                         for (int i = 0; i < processors; i++) {
417                                 int remainder = ((i+1) % processors);
418                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
419                         }
420                         
421                         
422                         //send each child the list of files it needs to process
423                         for(int i = 1; i < processors; i++) { 
424                                 //send number of file pairs
425                                 int num = dividedNames[i].size();
426                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
427                                 
428                                 for (int j = 0; j < num; j++) { //send filenames to process i
429                                         char tempDistFileName[1024];
430                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
431                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
432                                         
433                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
434                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
435                                         
436                                         char tempNameFileName[1024];
437                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
438                                         int lengthName = (dividedNames[i][j].begin()->second).length();
439
440                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
441                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
442                                 }
443                         }
444                         
445                         //process your share
446                         listFileNames = cluster(dividedNames[0], labels);
447                         
448                         //receive the other processes info
449                         for(int i = 1; i < processors; i++) { 
450                                 int num = dividedNames[i].size();
451                                 
452                                 double tempCutoff;
453                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
454                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
455                                 
456                                 //send list filenames to root process
457                                 for (int j = 0; j < num; j++) {  
458                                         int lengthList = 0;
459                                         char tempListFileName[1024];
460                                 
461                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
462                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
463                                 
464                                         string myListFileName = tempListFileName;
465                                         myListFileName = myListFileName.substr(0, lengthList);
466                                         
467                                         listFileNames.push_back(myListFileName);
468                                 }
469                                 
470                                 //send Labels to root process
471                                 int numLabels = 0;
472                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
473                                 
474                                 for (int j = 0; j < numLabels; j++) {  
475                                         int lengthLabel = 0;
476                                         char tempLabel[100];
477                                 
478                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
479                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
480                                 
481                                         string myLabel = tempLabel;
482                                         myLabel = myLabel.substr(0, lengthLabel);
483                         
484                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
485                                 }
486                         }
487                         
488                 }else { //you are a child process
489                         vector < map<string, string> >  myNames;
490                         
491                         //recieve the files you need to process
492                         //receive number of file pairs
493                         int num = 0;
494                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
495                         
496                         myNames.resize(num);
497         
498                         for (int j = 0; j < num; j++) { //receive filenames to process 
499                                 int lengthDist = 0;
500                                 char tempDistFileName[1024];
501                                 
502                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
503                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
504                                 
505                                 string myDistFileName = tempDistFileName;
506                                 myDistFileName = myDistFileName.substr(0, lengthDist);
507                         
508                                 int lengthName = 0;
509                                 char tempNameFileName[1024];
510                                 
511                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
512                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
513                                 
514                                 string myNameFileName = tempNameFileName;
515                                 myNameFileName = myNameFileName.substr(0, lengthName);
516                                 
517                                 //save file name
518                                 myNames[j][myDistFileName] = myNameFileName;
519                         }
520         
521                         //process them
522                         listFileNames = cluster(myNames, labels);
523                         
524                         //send cutoff
525                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
526                         
527                         //send list filenames to root process
528                         for (int j = 0; j < num; j++) {  
529                                 char tempListFileName[1024];
530                                 strcpy(tempListFileName, listFileNames[j].c_str());
531                                 int lengthList = listFileNames[j].length();
532                                         
533                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
534                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
535                         }
536                         
537                         //send Labels to root process
538                         int numLabels = labels.size();
539                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
540                         
541                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
542                                 char tempLabel[100];
543                                 strcpy(tempLabel, (*it).c_str());
544                                 int lengthLabel = (*it).length();
545                                         
546                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
547                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
548                         }
549                 }
550                 
551                 //make everyone wait
552                 MPI_Barrier(MPI_COMM_WORLD);
553                 
554         #else
555
556                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
557                                 if(processors == 1){
558                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
559                                 }else{
560                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
561                                         dividedNames.resize(processors);
562                                         
563                                         //for each file group figure out which process will complete it
564                                         //want to divide the load intelligently so the big files are spread between processes
565                                         for (int i = 0; i < distName.size(); i++) { 
566                                                 int processToAssign = (i+1) % processors; 
567                                                 if (processToAssign == 0) { processToAssign = processors; }
568                                                 
569                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
570                                         }
571                                         
572                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
573                                         for (int i = 0; i < processors; i++) {
574                                                 int remainder = ((i+1) % processors);
575                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
576                                         }
577                                         
578                                         createProcesses(dividedNames);
579                                                         
580                                         if (m->control_pressed) { return 0; }
581
582                                         //get list of list file names from each process
583                                         for(int i=0;i<processors;i++){
584                                                 string filename = toString(processIDS[i]) + ".temp";
585                                                 ifstream in;
586                                                 m->openInputFile(filename, in);
587                                                 
588                                                 in >> tag; m->gobble(in);
589                                                 
590                                                 while(!in.eof()) {
591                                                         string tempName;
592                                                         in >> tempName; m->gobble(in);
593                                                         listFileNames.push_back(tempName);
594                                                 }
595                                                 in.close();
596                                                 remove((toString(processIDS[i]) + ".temp").c_str());
597                                                 
598                                                 //get labels
599                                                 filename = toString(processIDS[i]) + ".temp.labels";
600                                                 ifstream in2;
601                                                 m->openInputFile(filename, in2);
602                                                 
603                                                 float tempCutoff;
604                                                 in2 >> tempCutoff; m->gobble(in2);
605                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
606                                                 
607                                                 while(!in2.eof()) {
608                                                         string tempName;
609                                                         in2 >> tempName; m->gobble(in2);
610                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
611                                                 }
612                                                 in2.close();
613                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
614                                         }
615                                 }
616                 #else
617                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
618                 #endif
619         #endif  
620                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
621                 
622                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
623                 
624                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
625                 
626                 //****************** merge list file and create rabund and sabund files ******************************//
627                 estart = time(NULL);
628                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
629                 
630                 #ifdef USE_MPI
631                         if (pid == 0) { //only process 0 merges
632                 #endif
633
634                 ListVector* listSingle;
635                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
636                 
637                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
638                 
639                 mergeLists(listFileNames, labelBins, listSingle);
640
641                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
642                 
643                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
644                 
645                 //set list file as new current listfile
646                 string current = "";
647                 itTypes = outputTypes.find("list");
648                 if (itTypes != outputTypes.end()) {
649                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
650                 }
651                 
652                 //set rabund file as new current rabundfile
653                 itTypes = outputTypes.find("rabund");
654                 if (itTypes != outputTypes.end()) {
655                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
656                 }
657                 
658                 //set sabund file as new current sabundfile
659                 itTypes = outputTypes.find("sabund");
660                 if (itTypes != outputTypes.end()) {
661                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
662                 }
663                                 
664                 m->mothurOutEndLine();
665                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
666                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
667                 m->mothurOutEndLine();
668                 
669                 #ifdef USE_MPI
670                         } //only process 0 merges
671                         
672                         //make everyone wait
673                         MPI_Barrier(MPI_COMM_WORLD);
674                 #endif
675
676                 return 0;
677         }
678         catch(exception& e) {
679                 m->errorOut(e, "ClusterSplitCommand", "execute");
680                 exit(1);
681         }
682 }
683 //**********************************************************************************************************************
684 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
685         try {
686                                 
687                 map<float, int> labelBin;
688                 vector<float> orderFloat;
689                 int numSingleBins;
690                 
691                 //read in singletons
692                 if (singleton != "none") {
693                         ifstream in;
694                         m->openInputFile(singleton, in);
695                                 
696                         string firstCol, secondCol;
697                         listSingle = new ListVector();
698                         while (!in.eof()) {
699                                 in >> firstCol >> secondCol; m->gobble(in);
700                                 listSingle->push_back(secondCol);
701                         }
702                         in.close();
703                         remove(singleton.c_str());
704                         
705                         numSingleBins = listSingle->getNumBins();
706                 }else{  listSingle = NULL; numSingleBins = 0;  }
707                 
708                 //go through users set and make them floats so we can sort them 
709                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
710                         float temp = -10.0;
711
712                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
713                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
714                         
715                         if (temp <= cutoff) {
716                                 orderFloat.push_back(temp);
717                                 labelBin[temp] = numSingleBins; //initialize numbins 
718                         }
719                 }
720         
721                 //sort order
722                 sort(orderFloat.begin(), orderFloat.end());
723                 userLabels.clear();
724                         
725                 //get the list info from each file
726                 for (int k = 0; k < listNames.size(); k++) {
727         
728                         if (m->control_pressed) {  
729                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
730                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
731                                 return labelBin;
732                         }
733                         
734                         InputData* input = new InputData(listNames[k], "list");
735                         ListVector* list = input->getListVector();
736                         string lastLabel = list->getLabel();
737                         
738                         string filledInList = listNames[k] + "filledInTemp";
739                         ofstream outFilled;
740                         m->openOutputFile(filledInList, outFilled);
741         
742                         //for each label needed
743                         for(int l = 0; l < orderFloat.size(); l++){
744                         
745                                 string thisLabel;
746                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
747                                 else { thisLabel = toString(orderFloat[l],  length-1);  } 
748
749                                 //this file has reached the end
750                                 if (list == NULL) { 
751                                         list = input->getListVector(lastLabel, true); 
752                                 }else{  //do you have the distance, or do you need to fill in
753                                                 
754                                         float labelFloat;
755                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
756                                         else { convert(list->getLabel(), labelFloat); }
757
758                                         //check for missing labels
759                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
760                                                 //if its bigger get last label, otherwise keep it
761                                                 delete list;
762                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
763                                         }
764                                         lastLabel = list->getLabel();
765                                 }
766                                 
767                                 //print to new file
768                                 list->setLabel(thisLabel);
769                                 list->print(outFilled);
770                 
771                                 //update labelBin
772                                 labelBin[orderFloat[l]] += list->getNumBins();
773                                                                         
774                                 delete list;
775                                                                         
776                                 list = input->getListVector();
777                         }
778                         
779                         if (list != NULL) { delete list; }
780                         delete input;
781                         
782                         outFilled.close();
783                         remove(listNames[k].c_str());
784                         rename(filledInList.c_str(), listNames[k].c_str());
785                 }
786                 
787                 return labelBin;
788         }
789         catch(exception& e) {
790                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
791                 exit(1);
792         }
793 }
794 //**********************************************************************************************************************
795 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
796         try {
797                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
798                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
799                 
800                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
801                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
802                 m->openOutputFile(fileroot+ tag + ".list",              outList);
803                                 
804                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
805                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
806                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
807                 
808                 map<float, int>::iterator itLabel;
809
810                 //for each label needed
811                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
812                         
813                         string thisLabel;
814                         if (itLabel->first == -1) { thisLabel = "unique"; }
815                         else { thisLabel = toString(itLabel->first,  length-1);  } 
816                         
817                         outList << thisLabel << '\t' << itLabel->second << '\t';
818
819                         RAbundVector* rabund = new RAbundVector();
820                         rabund->setLabel(thisLabel);
821
822                         //add in singletons
823                         if (listSingle != NULL) {
824                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
825                                         outList << listSingle->get(j) << '\t';
826                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
827                                 }
828                         }
829                         
830                         //get the list info from each file
831                         for (int k = 0; k < listNames.size(); k++) {
832         
833                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
834                                 
835                                 InputData* input = new InputData(listNames[k], "list");
836                                 ListVector* list = input->getListVector(thisLabel);
837                                 
838                                 //this file has reached the end
839                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
840                                 else {          
841                                         for (int j = 0; j < list->getNumBins(); j++) {
842                                                 outList << list->get(j) << '\t';
843                                                 rabund->push_back(m->getNumNames(list->get(j)));
844                                         }
845                                         delete list;
846                                 }
847                                 delete input;
848                         }
849                         
850                         SAbundVector sabund = rabund->getSAbundVector();
851                         
852                         sabund.print(outSabund);
853                         rabund->print(outRabund);
854                         outList << endl;
855                         
856                         delete rabund;
857                 }
858                 
859                 outList.close();
860                 outRabund.close();
861                 outSabund.close();
862                 
863                 if (listSingle != NULL) { delete listSingle;  }
864                 
865                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
866                 
867                 return 0;
868         }
869         catch(exception& e) {
870                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
871                 exit(1);
872         }
873 }
874
875 //**********************************************************************************************************************
876
877 void ClusterSplitCommand::printData(ListVector* oldList){
878         try {
879                 string label = oldList->getLabel();
880                 RAbundVector oldRAbund = oldList->getRAbundVector();
881                 
882                 oldRAbund.setLabel(label);
883                 if (m->isTrue(showabund)) {
884                         oldRAbund.getSAbundVector().print(cout);
885                 }
886                 oldRAbund.print(outRabund);
887                 oldRAbund.getSAbundVector().print(outSabund);
888         
889                 oldList->print(outList);
890         }
891         catch(exception& e) {
892                 m->errorOut(e, "ClusterSplitCommand", "printData");
893                 exit(1);
894         }
895 }
896 //**********************************************************************************************************************
897 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
898         try {
899         
900         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
901                 int process = 0;
902                 int exitCommand = 1;
903                 processIDS.clear();
904                 
905                 //loop through and create all the processes you want
906                 while (process != processors) {
907                         int pid = fork();
908                         
909                         if (pid > 0) {
910                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
911                                 process++;
912                         }else if (pid == 0){
913                                 set<string> labels;
914                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
915                                 
916                                 //write out names to file
917                                 string filename = toString(getpid()) + ".temp";
918                                 ofstream out;
919                                 m->openOutputFile(filename, out);
920                                 out << tag << endl;
921                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
922                                 out.close();
923                                 
924                                 //print out labels
925                                 ofstream outLabels;
926                                 filename = toString(getpid()) + ".temp.labels";
927                                 m->openOutputFile(filename, outLabels);
928                                 
929                                 outLabels << cutoff << endl;
930                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
931                                         outLabels << (*it) << endl;
932                                 }
933                                 outLabels.close();
934
935                                 exit(0);
936                         }else { 
937                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
938                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
939                                 exit(0);
940                         }
941                 }
942                 
943                 //force parent to wait until all the processes are done
944                 for (int i=0;i<processors;i++) { 
945                         int temp = processIDS[i];
946                         wait(&temp);
947                 }
948                 
949                 return exitCommand;
950         #endif          
951         
952         }
953         catch(exception& e) {
954                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
955                 exit(1);
956         }
957 }
958 //**********************************************************************************************************************
959
960 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
961         try {
962                 Cluster* cluster;
963                 SparseMatrix* matrix;
964                 ListVector* list;
965                 ListVector oldList;
966                 RAbundVector* rabund;
967                 
968                 vector<string> listFileNames;
969                 
970                 double smallestCutoff = cutoff;
971                 
972                 //cluster each distance file
973                 for (int i = 0; i < distNames.size(); i++) {
974                         if (m->control_pressed) { return listFileNames; }
975                         
976                         string thisNamefile = distNames[i].begin()->second;
977                         string thisDistFile = distNames[i].begin()->first;
978                                                 
979                         #ifdef USE_MPI
980                                 int pid;
981                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
982                                 
983                                 //output your files too
984                                 if (pid != 0) {
985                                         cout << endl << "Reading " << thisDistFile << endl;
986                                 }
987                         #endif
988                         
989                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
990                         
991                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
992                         read->setCutoff(cutoff);
993
994                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
995                         nameMap->readMap();
996                         read->read(nameMap);
997                         
998                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
999                         
1000                         list = read->getListVector();
1001                         oldList = *list;
1002                         matrix = read->getMatrix();
1003                         
1004                         delete read; 
1005                         delete nameMap; 
1006                         
1007                         
1008                         #ifdef USE_MPI
1009                                 //output your files too
1010                                 if (pid != 0) {
1011                                         cout << endl << "Clustering " << thisDistFile << endl;
1012                                 }
1013                         #endif
1014                         
1015                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1016                 
1017                         rabund = new RAbundVector(list->getRAbundVector());
1018                         
1019                         //create cluster
1020                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1021                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1022                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1023                         tag = cluster->getTag();
1024                 
1025                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1026                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1027                         
1028                         ofstream listFile;
1029                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1030                 
1031                         listFileNames.push_back(fileroot+ tag + ".list");
1032                                 
1033                         float previousDist = 0.00000;
1034                         float rndPreviousDist = 0.00000;
1035                         
1036                         oldList = *list;
1037
1038                         print_start = true;
1039                         start = time(NULL);
1040                         double saveCutoff = cutoff;
1041                 
1042                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1043                 
1044                                 if (m->control_pressed) { //clean up
1045                                         delete matrix; delete list;     delete cluster; delete rabund;
1046                                         listFile.close();
1047                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1048                                         listFileNames.clear(); return listFileNames;
1049                                 }
1050                 
1051                                 cluster->update(saveCutoff);
1052         
1053                                 float dist = matrix->getSmallDist();
1054                                 float rndDist;
1055                                 if (hard) {
1056                                         rndDist = m->ceilDist(dist, precision); 
1057                                 }else{
1058                                         rndDist = m->roundDist(dist, precision); 
1059                                 }
1060
1061                                 if(previousDist <= 0.0000 && dist != previousDist){
1062                                         oldList.setLabel("unique");
1063                                         oldList.print(listFile);
1064                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1065                                 }
1066                                 else if(rndDist != rndPreviousDist){
1067                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1068                                         oldList.print(listFile);
1069                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1070                                 }
1071                 
1072                                 previousDist = dist;
1073                                 rndPreviousDist = rndDist;
1074                                 oldList = *list;
1075                         }
1076
1077                 
1078                         if(previousDist <= 0.0000){
1079                                 oldList.setLabel("unique");
1080                                 oldList.print(listFile);
1081                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1082                         }
1083                         else if(rndPreviousDist<cutoff){
1084                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1085                                 oldList.print(listFile);
1086                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1087                         }
1088         
1089                         delete matrix; delete list;     delete cluster; delete rabund; 
1090                         listFile.close();
1091                         
1092                         if (m->control_pressed) { //clean up
1093                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1094                                 listFileNames.clear(); return listFileNames;
1095                         }
1096                         
1097                         remove(thisDistFile.c_str());
1098                         remove(thisNamefile.c_str());
1099                         
1100                         if (saveCutoff != cutoff) { 
1101                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1102                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1103                         
1104                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
1105                         }
1106                         
1107                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1108                 }
1109                 
1110                 cutoff = smallestCutoff;
1111                                         
1112                 return listFileNames;
1113         
1114         }
1115         catch(exception& e) {
1116                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1117                 exit(1);
1118         }
1119
1120
1121 }
1122 //**********************************************************************************************************************
1123
1124 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1125         try{
1126                 
1127 #ifdef USE_MPI
1128                 int pid;
1129                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1130                 
1131                 if (pid != 0) {
1132 #endif
1133                 
1134                 string thisOutputDir = outputDir;
1135                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1136                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1137                 remove(outputFileName.c_str());
1138                 
1139                 
1140                 for (int i = 0; i < distNames.size(); i++) {
1141                         if (m->control_pressed) {  return 0; }
1142                         
1143                         string thisDistFile = distNames[i].begin()->first;
1144                         
1145                         m->appendFiles(thisDistFile, outputFileName);
1146                 }       
1147                         
1148                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1149                         
1150 #ifdef USE_MPI
1151                 }
1152 #endif
1153                                 
1154                 return 0;       
1155                 
1156                 
1157         }
1158         catch(exception& e) {
1159                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1160                 exit(1);
1161         }
1162 }
1163 //**********************************************************************************************************************