]> git.donarmstrong.com Git - mothur.git/blob - clustersplitcommand.cpp
added citation function to commands
[mothur.git] / clustersplitcommand.cpp
1 /*
2  *  clustersplitcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clustersplitcommand.h"
11 #include "readcluster.h"
12 #include "splitmatrix.h"
13 #include "readphylip.h"
14 #include "readcolumn.h"
15 #include "readmatrix.hpp"
16 #include "inputdata.h"
17
18
19 //**********************************************************************************************************************
20 vector<string> ClusterSplitCommand::setParameters(){    
21         try {
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
33                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
39                         
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ClusterSplitCommand::getHelpString(){    
51         try {
52                 string helpString = "";
53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
70 #ifdef USE_MPI
71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
72 #endif
73                 helpString += "The cluster.split command should be in the following format: \n";
74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";       
76                 return helpString;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 ClusterSplitCommand::ClusterSplitCommand(){     
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["list"] = tempOutNames;
90                 outputTypes["rabund"] = tempOutNames;
91                 outputTypes["sabund"] = tempOutNames;
92                 outputTypes["column"] = tempOutNames;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
96                 exit(1);
97         }
98 }
99 //**********************************************************************************************************************
100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
102         try{
103                 abort = false; calledHelp = false;   
104                 format = "";
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string,string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter("cluster.split");
117                 
118                         //check to make sure all parameters are valid for command
119                         map<string,string>::iterator it;
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
122                                         abort = true;
123                                 }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["list"] = tempOutNames;
129                         outputTypes["rabund"] = tempOutNames;
130                         outputTypes["sabund"] = tempOutNames;
131                         outputTypes["column"] = tempOutNames;
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
135                         
136                                 //if the user changes the input directory command factory will send this info to us in the output parameter 
137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
138                         if (inputDir == "not found"){   inputDir = "";          }
139                         else {
140                                 string path;
141                                 it = parameters.find("phylip");
142                                 //user has given a template file
143                                 if(it != parameters.end()){ 
144                                         path = m->hasPath(it->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
147                                 }
148                                 
149                                 it = parameters.find("column");
150                                 //user has given a template file
151                                 if(it != parameters.end()){ 
152                                         path = m->hasPath(it->second);
153                                         //if the user has not given a path then, add inputdir. else leave path alone.
154                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
155                                 }
156                                 
157                                 it = parameters.find("name");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
163                                 }
164                                 
165                                 it = parameters.find("taxonomy");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
171                                 }
172                                 
173                                 it = parameters.find("fasta");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
179                                 }
180                         }
181                         
182                         //check for required parameters
183                         phylipfile = validParameter.validFile(parameters, "phylip", true);
184                         if (phylipfile == "not open") { abort = true; }
185                         else if (phylipfile == "not found") { phylipfile = ""; }        
186                         else {  distfile = phylipfile;  format = "phylip";      }
187                         
188                         columnfile = validParameter.validFile(parameters, "column", true);
189                         if (columnfile == "not open") { abort = true; } 
190                         else if (columnfile == "not found") { columnfile = ""; }
191                         else {  distfile = columnfile; format = "column";       }
192                         
193                         namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not open") { abort = true; }   
195                         else if (namefile == "not found") { namefile = ""; }
196                         
197                         fastafile = validParameter.validFile(parameters, "fasta", true);
198                         if (fastafile == "not open") { abort = true; }  
199                         else if (fastafile == "not found") { fastafile = ""; }
200                         else { distfile = fastafile;  splitmethod = "fasta";  }
201                         
202                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
203                         if (taxFile == "not open") { abort = true; }    
204                         else if (taxFile == "not found") { taxFile = ""; }
205                         
206                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { 
207                                 //is there are current file available for either of these?
208                                 //give priority to column, then phylip, then fasta
209                                 columnfile = m->getColumnFile(); 
210                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
211                                 else { 
212                                         phylipfile = m->getPhylipFile(); 
213                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
214                                         else { 
215                                                 fastafile = m->getFastaFile(); 
216                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
217                                                 else { 
218                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); 
219                                                         abort = true; 
220                                                 }
221                                         }
222                                 }
223                         }
224                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
225                 
226                         if (columnfile != "") {
227                                 if (namefile == "") { 
228                                         namefile = m->getNameFile(); 
229                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
230                                         else { 
231                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
232                                                 abort = true; 
233                                         }       
234                                 }
235                         }
236                         
237                         if (fastafile != "") {
238                                 if (taxFile == "") { 
239                                         taxFile = m->getTaxonomyFile(); 
240                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
241                                         else { 
242                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
243                                                 abort = true; 
244                                         }       
245                                 }
246                                 
247                                 if (namefile == "") { 
248                                         namefile = m->getNameFile(); 
249                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
250                                         else { 
251                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
252                                                 abort = true; 
253                                         }       
254                                 }
255                         }
256                                         
257                         //check for optional parameter and set defaults
258                         // ...at some point should added some additional type checking...
259                         //get user cutoff and precision or use defaults
260                         string temp;
261                         temp = validParameter.validFile(parameters, "precision", false);
262                         if (temp == "not found") { temp = "100"; }
263                         //saves precision legnth for formatting below
264                         length = temp.length();
265                         convert(temp, precision); 
266                         
267                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
268                         hard = m->isTrue(temp);
269                         
270                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
271                         large = m->isTrue(temp);
272                         
273                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
274                         m->setProcessors(temp);
275                         convert(temp, processors);
276                         
277                         temp = validParameter.validFile(parameters, "splitmethod", false);      
278                         if (splitmethod != "fasta") {
279                                 if (temp == "not found")  { splitmethod = "distance"; }
280                                 else {  splitmethod = temp; }
281                         }
282                         
283                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
284                         convert(temp, cutoff); 
285                         cutoff += (5 / (precision * 10.0));  
286                         
287                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
288                         convert(temp, taxLevelCutoff); 
289                         
290                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
291                         
292                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
293                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
294                         
295                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
296                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
297                         
298                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
299
300                         showabund = validParameter.validFile(parameters, "showabund", false);
301                         if (showabund == "not found") { showabund = "T"; }
302
303                         timing = validParameter.validFile(parameters, "timing", false);
304                         if (timing == "not found") { timing = "F"; }
305                         
306                 }
307         }
308         catch(exception& e) {
309                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
310                 exit(1);
311         }
312 }
313
314 //**********************************************************************************************************************
315
316 int ClusterSplitCommand::execute(){
317         try {
318         
319                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
320                 
321                 time_t estart;
322                 vector<string> listFileNames;
323                 set<string> labels;
324                 string singletonName = "";
325                 double saveCutoff = cutoff;
326
327                 //****************** file prep work ******************************//
328                 #ifdef USE_MPI
329                         int pid;
330                         int tag = 2001;
331                         MPI_Status status; 
332                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
333                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
334                         
335                         if (pid == 0) { //only process 0 converts and splits
336                         
337                 #endif
338                 
339                 //if user gave a phylip file convert to column file
340                 if (format == "phylip") {
341                         estart = time(NULL);
342                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
343                         
344                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
345                         
346                         NameAssignment* nameMap = NULL;
347                         convert->setFormat("phylip");
348                         convert->read(nameMap);
349                         
350                         if (m->control_pressed) {  delete convert;  return 0;  }
351                         
352                         distfile = convert->getOutputFile();
353                 
354                         //if no names file given with phylip file, create it
355                         ListVector* listToMakeNameFile =  convert->getListVector();
356                         if (namefile == "") {  //you need to make a namefile for split matrix
357                                 ofstream out;
358                                 namefile = phylipfile + ".names";
359                                 m->openOutputFile(namefile, out);
360                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
361                                         string bin = listToMakeNameFile->get(i);
362                                         out << bin << '\t' << bin << endl;
363                                 }
364                                 out.close();
365                         }
366                         delete listToMakeNameFile;
367                         delete convert;
368                         
369                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
370                 }
371                 if (m->control_pressed) { return 0; }
372                 
373                 estart = time(NULL);
374                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
375                 
376                 //split matrix into non-overlapping groups
377                 SplitMatrix* split;
378                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
379                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
380                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
381                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
382                 
383                 split->split();
384                 
385                 if (m->control_pressed) { delete split; return 0; }
386                 
387                 singletonName = split->getSingletonNames();
388                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
389                 delete split;
390                 
391                 //output a merged distance file
392                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
393                         
394                                 
395                 if (m->control_pressed) { return 0; }
396                 
397                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
398                 estart = time(NULL);
399                 
400                 //****************** break up files between processes and cluster each file set ******************************//
401         #ifdef USE_MPI
402                         ////you are process 0 from above////
403                         
404                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...                           
405                         dividedNames.resize(processors);
406                                         
407                         //for each file group figure out which process will complete it
408                         //want to divide the load intelligently so the big files are spread between processes
409                         for (int i = 0; i < distName.size(); i++) { 
410                                 int processToAssign = (i+1) % processors; 
411                                 if (processToAssign == 0) { processToAssign = processors; }
412                                                 
413                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
414                         }
415                                         
416                         //not lets reverse the order of ever other process, so we balance big files running with little ones
417                         for (int i = 0; i < processors; i++) {
418                                 int remainder = ((i+1) % processors);
419                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
420                         }
421                         
422                         
423                         //send each child the list of files it needs to process
424                         for(int i = 1; i < processors; i++) { 
425                                 //send number of file pairs
426                                 int num = dividedNames[i].size();
427                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
428                                 
429                                 for (int j = 0; j < num; j++) { //send filenames to process i
430                                         char tempDistFileName[1024];
431                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
432                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
433                                         
434                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
435                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
436                                         
437                                         char tempNameFileName[1024];
438                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
439                                         int lengthName = (dividedNames[i][j].begin()->second).length();
440
441                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
442                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
443                                 }
444                         }
445                         
446                         //process your share
447                         listFileNames = cluster(dividedNames[0], labels);
448                         
449                         //receive the other processes info
450                         for(int i = 1; i < processors; i++) { 
451                                 int num = dividedNames[i].size();
452                                 
453                                 double tempCutoff;
454                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
455                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
456                                 
457                                 //send list filenames to root process
458                                 for (int j = 0; j < num; j++) {  
459                                         int lengthList = 0;
460                                         char tempListFileName[1024];
461                                 
462                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
463                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
464                                 
465                                         string myListFileName = tempListFileName;
466                                         myListFileName = myListFileName.substr(0, lengthList);
467                                         
468                                         listFileNames.push_back(myListFileName);
469                                 }
470                                 
471                                 //send Labels to root process
472                                 int numLabels = 0;
473                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
474                                 
475                                 for (int j = 0; j < numLabels; j++) {  
476                                         int lengthLabel = 0;
477                                         char tempLabel[100];
478                                 
479                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
480                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
481                                 
482                                         string myLabel = tempLabel;
483                                         myLabel = myLabel.substr(0, lengthLabel);
484                         
485                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
486                                 }
487                         }
488                         
489                 }else { //you are a child process
490                         vector < map<string, string> >  myNames;
491                         
492                         //recieve the files you need to process
493                         //receive number of file pairs
494                         int num = 0;
495                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
496                         
497                         myNames.resize(num);
498         
499                         for (int j = 0; j < num; j++) { //receive filenames to process 
500                                 int lengthDist = 0;
501                                 char tempDistFileName[1024];
502                                 
503                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
504                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
505                                 
506                                 string myDistFileName = tempDistFileName;
507                                 myDistFileName = myDistFileName.substr(0, lengthDist);
508                         
509                                 int lengthName = 0;
510                                 char tempNameFileName[1024];
511                                 
512                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
513                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
514                                 
515                                 string myNameFileName = tempNameFileName;
516                                 myNameFileName = myNameFileName.substr(0, lengthName);
517                                 
518                                 //save file name
519                                 myNames[j][myDistFileName] = myNameFileName;
520                         }
521         
522                         //process them
523                         listFileNames = cluster(myNames, labels);
524                         
525                         //send cutoff
526                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
527                         
528                         //send list filenames to root process
529                         for (int j = 0; j < num; j++) {  
530                                 char tempListFileName[1024];
531                                 strcpy(tempListFileName, listFileNames[j].c_str());
532                                 int lengthList = listFileNames[j].length();
533                                         
534                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
535                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
536                         }
537                         
538                         //send Labels to root process
539                         int numLabels = labels.size();
540                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
541                         
542                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
543                                 char tempLabel[100];
544                                 strcpy(tempLabel, (*it).c_str());
545                                 int lengthLabel = (*it).length();
546                                         
547                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
548                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
549                         }
550                 }
551                 
552                 //make everyone wait
553                 MPI_Barrier(MPI_COMM_WORLD);
554                 
555         #else
556
557                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
558                                 if(processors == 1){
559                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
560                                 }else{
561                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
562                                         dividedNames.resize(processors);
563                                         
564                                         //for each file group figure out which process will complete it
565                                         //want to divide the load intelligently so the big files are spread between processes
566                                         for (int i = 0; i < distName.size(); i++) { 
567                                                 int processToAssign = (i+1) % processors; 
568                                                 if (processToAssign == 0) { processToAssign = processors; }
569                                                 
570                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
571                                         }
572                                         
573                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
574                                         for (int i = 0; i < processors; i++) {
575                                                 int remainder = ((i+1) % processors);
576                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
577                                         }
578                                         
579                                         createProcesses(dividedNames);
580                                                         
581                                         if (m->control_pressed) { return 0; }
582
583                                         //get list of list file names from each process
584                                         for(int i=0;i<processors;i++){
585                                                 string filename = toString(processIDS[i]) + ".temp";
586                                                 ifstream in;
587                                                 m->openInputFile(filename, in);
588                                                 
589                                                 in >> tag; m->gobble(in);
590                                                 
591                                                 while(!in.eof()) {
592                                                         string tempName;
593                                                         in >> tempName; m->gobble(in);
594                                                         listFileNames.push_back(tempName);
595                                                 }
596                                                 in.close();
597                                                 remove((toString(processIDS[i]) + ".temp").c_str());
598                                                 
599                                                 //get labels
600                                                 filename = toString(processIDS[i]) + ".temp.labels";
601                                                 ifstream in2;
602                                                 m->openInputFile(filename, in2);
603                                                 
604                                                 float tempCutoff;
605                                                 in2 >> tempCutoff; m->gobble(in2);
606                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
607                                                 
608                                                 while(!in2.eof()) {
609                                                         string tempName;
610                                                         in2 >> tempName; m->gobble(in2);
611                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
612                                                 }
613                                                 in2.close();
614                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
615                                         }
616                                 }
617                 #else
618                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
619                 #endif
620         #endif  
621                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
622                 
623                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
624                 
625                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
626                 
627                 //****************** merge list file and create rabund and sabund files ******************************//
628                 estart = time(NULL);
629                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
630                 
631                 #ifdef USE_MPI
632                         if (pid == 0) { //only process 0 merges
633                 #endif
634
635                 ListVector* listSingle;
636                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
637                 
638                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
639                 
640                 mergeLists(listFileNames, labelBins, listSingle);
641
642                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
643                 
644                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
645                 
646                 //set list file as new current listfile
647                 string current = "";
648                 itTypes = outputTypes.find("list");
649                 if (itTypes != outputTypes.end()) {
650                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
651                 }
652                 
653                 //set rabund file as new current rabundfile
654                 itTypes = outputTypes.find("rabund");
655                 if (itTypes != outputTypes.end()) {
656                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
657                 }
658                 
659                 //set sabund file as new current sabundfile
660                 itTypes = outputTypes.find("sabund");
661                 if (itTypes != outputTypes.end()) {
662                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
663                 }
664                                 
665                 m->mothurOutEndLine();
666                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
667                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
668                 m->mothurOutEndLine();
669                 
670                 #ifdef USE_MPI
671                         } //only process 0 merges
672                         
673                         //make everyone wait
674                         MPI_Barrier(MPI_COMM_WORLD);
675                 #endif
676
677                 return 0;
678         }
679         catch(exception& e) {
680                 m->errorOut(e, "ClusterSplitCommand", "execute");
681                 exit(1);
682         }
683 }
684 //**********************************************************************************************************************
685 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
686         try {
687                                 
688                 map<float, int> labelBin;
689                 vector<float> orderFloat;
690                 int numSingleBins;
691                 
692                 //read in singletons
693                 if (singleton != "none") {
694                         ifstream in;
695                         m->openInputFile(singleton, in);
696                                 
697                         string firstCol, secondCol;
698                         listSingle = new ListVector();
699                         while (!in.eof()) {
700                                 in >> firstCol >> secondCol; m->gobble(in);
701                                 listSingle->push_back(secondCol);
702                         }
703                         in.close();
704                         remove(singleton.c_str());
705                         
706                         numSingleBins = listSingle->getNumBins();
707                 }else{  listSingle = NULL; numSingleBins = 0;  }
708                 
709                 //go through users set and make them floats so we can sort them 
710                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
711                         float temp = -10.0;
712
713                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
714                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
715                         
716                         if (temp <= cutoff) {
717                                 orderFloat.push_back(temp);
718                                 labelBin[temp] = numSingleBins; //initialize numbins 
719                         }
720                 }
721         
722                 //sort order
723                 sort(orderFloat.begin(), orderFloat.end());
724                 userLabels.clear();
725                         
726                 //get the list info from each file
727                 for (int k = 0; k < listNames.size(); k++) {
728         
729                         if (m->control_pressed) {  
730                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
731                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
732                                 return labelBin;
733                         }
734                         
735                         InputData* input = new InputData(listNames[k], "list");
736                         ListVector* list = input->getListVector();
737                         string lastLabel = list->getLabel();
738                         
739                         string filledInList = listNames[k] + "filledInTemp";
740                         ofstream outFilled;
741                         m->openOutputFile(filledInList, outFilled);
742         
743                         //for each label needed
744                         for(int l = 0; l < orderFloat.size(); l++){
745                         
746                                 string thisLabel;
747                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
748                                 else { thisLabel = toString(orderFloat[l],  length-1);  } 
749
750                                 //this file has reached the end
751                                 if (list == NULL) { 
752                                         list = input->getListVector(lastLabel, true); 
753                                 }else{  //do you have the distance, or do you need to fill in
754                                                 
755                                         float labelFloat;
756                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
757                                         else { convert(list->getLabel(), labelFloat); }
758
759                                         //check for missing labels
760                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
761                                                 //if its bigger get last label, otherwise keep it
762                                                 delete list;
763                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
764                                         }
765                                         lastLabel = list->getLabel();
766                                 }
767                                 
768                                 //print to new file
769                                 list->setLabel(thisLabel);
770                                 list->print(outFilled);
771                 
772                                 //update labelBin
773                                 labelBin[orderFloat[l]] += list->getNumBins();
774                                                                         
775                                 delete list;
776                                                                         
777                                 list = input->getListVector();
778                         }
779                         
780                         if (list != NULL) { delete list; }
781                         delete input;
782                         
783                         outFilled.close();
784                         remove(listNames[k].c_str());
785                         rename(filledInList.c_str(), listNames[k].c_str());
786                 }
787                 
788                 return labelBin;
789         }
790         catch(exception& e) {
791                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
792                 exit(1);
793         }
794 }
795 //**********************************************************************************************************************
796 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
797         try {
798                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
799                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
800                 
801                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
802                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
803                 m->openOutputFile(fileroot+ tag + ".list",              outList);
804                                 
805                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
806                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
807                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
808                 
809                 map<float, int>::iterator itLabel;
810
811                 //for each label needed
812                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
813                         
814                         string thisLabel;
815                         if (itLabel->first == -1) { thisLabel = "unique"; }
816                         else { thisLabel = toString(itLabel->first,  length-1);  } 
817                         
818                         outList << thisLabel << '\t' << itLabel->second << '\t';
819
820                         RAbundVector* rabund = new RAbundVector();
821                         rabund->setLabel(thisLabel);
822
823                         //add in singletons
824                         if (listSingle != NULL) {
825                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
826                                         outList << listSingle->get(j) << '\t';
827                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
828                                 }
829                         }
830                         
831                         //get the list info from each file
832                         for (int k = 0; k < listNames.size(); k++) {
833         
834                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
835                                 
836                                 InputData* input = new InputData(listNames[k], "list");
837                                 ListVector* list = input->getListVector(thisLabel);
838                                 
839                                 //this file has reached the end
840                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
841                                 else {          
842                                         for (int j = 0; j < list->getNumBins(); j++) {
843                                                 outList << list->get(j) << '\t';
844                                                 rabund->push_back(m->getNumNames(list->get(j)));
845                                         }
846                                         delete list;
847                                 }
848                                 delete input;
849                         }
850                         
851                         SAbundVector sabund = rabund->getSAbundVector();
852                         
853                         sabund.print(outSabund);
854                         rabund->print(outRabund);
855                         outList << endl;
856                         
857                         delete rabund;
858                 }
859                 
860                 outList.close();
861                 outRabund.close();
862                 outSabund.close();
863                 
864                 if (listSingle != NULL) { delete listSingle;  }
865                 
866                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
867                 
868                 return 0;
869         }
870         catch(exception& e) {
871                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
872                 exit(1);
873         }
874 }
875
876 //**********************************************************************************************************************
877
878 void ClusterSplitCommand::printData(ListVector* oldList){
879         try {
880                 string label = oldList->getLabel();
881                 RAbundVector oldRAbund = oldList->getRAbundVector();
882                 
883                 oldRAbund.setLabel(label);
884                 if (m->isTrue(showabund)) {
885                         oldRAbund.getSAbundVector().print(cout);
886                 }
887                 oldRAbund.print(outRabund);
888                 oldRAbund.getSAbundVector().print(outSabund);
889         
890                 oldList->print(outList);
891         }
892         catch(exception& e) {
893                 m->errorOut(e, "ClusterSplitCommand", "printData");
894                 exit(1);
895         }
896 }
897 //**********************************************************************************************************************
898 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
899         try {
900         
901         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
902                 int process = 0;
903                 int exitCommand = 1;
904                 processIDS.clear();
905                 
906                 //loop through and create all the processes you want
907                 while (process != processors) {
908                         int pid = fork();
909                         
910                         if (pid > 0) {
911                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
912                                 process++;
913                         }else if (pid == 0){
914                                 set<string> labels;
915                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
916                                 
917                                 //write out names to file
918                                 string filename = toString(getpid()) + ".temp";
919                                 ofstream out;
920                                 m->openOutputFile(filename, out);
921                                 out << tag << endl;
922                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
923                                 out.close();
924                                 
925                                 //print out labels
926                                 ofstream outLabels;
927                                 filename = toString(getpid()) + ".temp.labels";
928                                 m->openOutputFile(filename, outLabels);
929                                 
930                                 outLabels << cutoff << endl;
931                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
932                                         outLabels << (*it) << endl;
933                                 }
934                                 outLabels.close();
935
936                                 exit(0);
937                         }else { 
938                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
939                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
940                                 exit(0);
941                         }
942                 }
943                 
944                 //force parent to wait until all the processes are done
945                 for (int i=0;i<processors;i++) { 
946                         int temp = processIDS[i];
947                         wait(&temp);
948                 }
949                 
950                 return exitCommand;
951         #endif          
952         
953         }
954         catch(exception& e) {
955                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
956                 exit(1);
957         }
958 }
959 //**********************************************************************************************************************
960
961 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
962         try {
963                 Cluster* cluster;
964                 SparseMatrix* matrix;
965                 ListVector* list;
966                 ListVector oldList;
967                 RAbundVector* rabund;
968                 
969                 vector<string> listFileNames;
970                 
971                 double smallestCutoff = cutoff;
972                 
973                 //cluster each distance file
974                 for (int i = 0; i < distNames.size(); i++) {
975                         if (m->control_pressed) { return listFileNames; }
976                         
977                         string thisNamefile = distNames[i].begin()->second;
978                         string thisDistFile = distNames[i].begin()->first;
979                                                 
980                         #ifdef USE_MPI
981                                 int pid;
982                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
983                                 
984                                 //output your files too
985                                 if (pid != 0) {
986                                         cout << endl << "Reading " << thisDistFile << endl;
987                                 }
988                         #endif
989                         
990                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
991                         
992                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
993                         read->setCutoff(cutoff);
994
995                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
996                         nameMap->readMap();
997                         read->read(nameMap);
998                         
999                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
1000                         
1001                         list = read->getListVector();
1002                         oldList = *list;
1003                         matrix = read->getMatrix();
1004                         
1005                         delete read; 
1006                         delete nameMap; 
1007                         
1008                         
1009                         #ifdef USE_MPI
1010                                 //output your files too
1011                                 if (pid != 0) {
1012                                         cout << endl << "Clustering " << thisDistFile << endl;
1013                                 }
1014                         #endif
1015                         
1016                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1017                 
1018                         rabund = new RAbundVector(list->getRAbundVector());
1019                         
1020                         //create cluster
1021                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1022                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1023                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1024                         tag = cluster->getTag();
1025                 
1026                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1027                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1028                         
1029                         ofstream listFile;
1030                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1031                 
1032                         listFileNames.push_back(fileroot+ tag + ".list");
1033                                 
1034                         float previousDist = 0.00000;
1035                         float rndPreviousDist = 0.00000;
1036                         
1037                         oldList = *list;
1038
1039                         print_start = true;
1040                         start = time(NULL);
1041                         double saveCutoff = cutoff;
1042                 
1043                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1044                 
1045                                 if (m->control_pressed) { //clean up
1046                                         delete matrix; delete list;     delete cluster; delete rabund;
1047                                         listFile.close();
1048                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1049                                         listFileNames.clear(); return listFileNames;
1050                                 }
1051                 
1052                                 cluster->update(saveCutoff);
1053         
1054                                 float dist = matrix->getSmallDist();
1055                                 float rndDist;
1056                                 if (hard) {
1057                                         rndDist = m->ceilDist(dist, precision); 
1058                                 }else{
1059                                         rndDist = m->roundDist(dist, precision); 
1060                                 }
1061
1062                                 if(previousDist <= 0.0000 && dist != previousDist){
1063                                         oldList.setLabel("unique");
1064                                         oldList.print(listFile);
1065                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1066                                 }
1067                                 else if(rndDist != rndPreviousDist){
1068                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1069                                         oldList.print(listFile);
1070                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1071                                 }
1072                 
1073                                 previousDist = dist;
1074                                 rndPreviousDist = rndDist;
1075                                 oldList = *list;
1076                         }
1077
1078                 
1079                         if(previousDist <= 0.0000){
1080                                 oldList.setLabel("unique");
1081                                 oldList.print(listFile);
1082                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1083                         }
1084                         else if(rndPreviousDist<cutoff){
1085                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1086                                 oldList.print(listFile);
1087                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1088                         }
1089         
1090                         delete matrix; delete list;     delete cluster; delete rabund; 
1091                         listFile.close();
1092                         
1093                         if (m->control_pressed) { //clean up
1094                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1095                                 listFileNames.clear(); return listFileNames;
1096                         }
1097                         
1098                         remove(thisDistFile.c_str());
1099                         remove(thisNamefile.c_str());
1100                         
1101                         if (saveCutoff != cutoff) { 
1102                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1103                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1104                         
1105                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
1106                         }
1107                         
1108                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1109                 }
1110                 
1111                 cutoff = smallestCutoff;
1112                                         
1113                 return listFileNames;
1114         
1115         }
1116         catch(exception& e) {
1117                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1118                 exit(1);
1119         }
1120
1121
1122 }
1123 //**********************************************************************************************************************
1124
1125 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1126         try{
1127                 
1128 #ifdef USE_MPI
1129                 int pid;
1130                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1131                 
1132                 if (pid != 0) {
1133 #endif
1134                 
1135                 string thisOutputDir = outputDir;
1136                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1137                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1138                 remove(outputFileName.c_str());
1139                 
1140                 
1141                 for (int i = 0; i < distNames.size(); i++) {
1142                         if (m->control_pressed) {  return 0; }
1143                         
1144                         string thisDistFile = distNames[i].begin()->first;
1145                         
1146                         m->appendFiles(thisDistFile, outputFileName);
1147                 }       
1148                         
1149                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1150                         
1151 #ifdef USE_MPI
1152                 }
1153 #endif
1154                                 
1155                 return 0;       
1156                 
1157                 
1158         }
1159         catch(exception& e) {
1160                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1161                 exit(1);
1162         }
1163 }
1164 //**********************************************************************************************************************