]> git.donarmstrong.com Git - mothur.git/blob - clustersplitcommand.cpp
testing 1.22.0
[mothur.git] / clustersplitcommand.cpp
1 /*
2  *  clustersplitcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clustersplitcommand.h"
11 #include "readcluster.h"
12 #include "splitmatrix.h"
13 #include "readphylip.h"
14 #include "readcolumn.h"
15 #include "readmatrix.hpp"
16 #include "inputdata.h"
17
18
19 //**********************************************************************************************************************
20 vector<string> ClusterSplitCommand::setParameters(){    
21         try {
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel);
28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
33                 CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff);
34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
39                         
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ClusterSplitCommand::getHelpString(){    
51         try {
52                 string helpString = "";
53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n";
64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
70 #ifdef USE_MPI
71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
72 #endif
73                 helpString += "The cluster.split command should be in the following format: \n";
74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";       
76                 return helpString;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 ClusterSplitCommand::ClusterSplitCommand(){     
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["list"] = tempOutNames;
90                 outputTypes["rabund"] = tempOutNames;
91                 outputTypes["sabund"] = tempOutNames;
92                 outputTypes["column"] = tempOutNames;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
96                 exit(1);
97         }
98 }
99 //**********************************************************************************************************************
100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
102         try{
103                 abort = false; calledHelp = false;   
104                 format = "";
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string,string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter("cluster.split");
117                 
118                         //check to make sure all parameters are valid for command
119                         map<string,string>::iterator it;
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
122                                         abort = true;
123                                 }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["list"] = tempOutNames;
129                         outputTypes["rabund"] = tempOutNames;
130                         outputTypes["sabund"] = tempOutNames;
131                         outputTypes["column"] = tempOutNames;
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
135                         
136                                 //if the user changes the input directory command factory will send this info to us in the output parameter 
137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
138                         if (inputDir == "not found"){   inputDir = "";          }
139                         else {
140                                 string path;
141                                 it = parameters.find("phylip");
142                                 //user has given a template file
143                                 if(it != parameters.end()){ 
144                                         path = m->hasPath(it->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
147                                 }
148                                 
149                                 it = parameters.find("column");
150                                 //user has given a template file
151                                 if(it != parameters.end()){ 
152                                         path = m->hasPath(it->second);
153                                         //if the user has not given a path then, add inputdir. else leave path alone.
154                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
155                                 }
156                                 
157                                 it = parameters.find("name");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
163                                 }
164                                 
165                                 it = parameters.find("taxonomy");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
171                                 }
172                                 
173                                 it = parameters.find("fasta");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
179                                 }
180                         }
181                         
182                         //check for required parameters
183                         phylipfile = validParameter.validFile(parameters, "phylip", true);
184                         if (phylipfile == "not open") { abort = true; }
185                         else if (phylipfile == "not found") { phylipfile = ""; }        
186                         else {  distfile = phylipfile;  format = "phylip";      m->setPhylipFile(phylipfile); }
187                         
188                         columnfile = validParameter.validFile(parameters, "column", true);
189                         if (columnfile == "not open") { abort = true; } 
190                         else if (columnfile == "not found") { columnfile = ""; }
191                         else {  distfile = columnfile; format = "column";       m->setColumnFile(columnfile); }
192                         
193                         namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not open") { abort = true; }   
195                         else if (namefile == "not found") { namefile = "";  }
196                         else { m->setNameFile(namefile); }
197                         
198                         fastafile = validParameter.validFile(parameters, "fasta", true);
199                         if (fastafile == "not open") { abort = true; }  
200                         else if (fastafile == "not found") { fastafile = ""; }
201                         else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
202                         
203                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
204                         if (taxFile == "not open") { abort = true; }    
205                         else if (taxFile == "not found") { taxFile = ""; }
206                         else {  m->setTaxonomyFile(taxFile); }
207                         
208                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { 
209                                 //is there are current file available for either of these?
210                                 //give priority to column, then phylip, then fasta
211                                 columnfile = m->getColumnFile(); 
212                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
213                                 else { 
214                                         phylipfile = m->getPhylipFile(); 
215                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
216                                         else { 
217                                                 fastafile = m->getFastaFile(); 
218                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
219                                                 else { 
220                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); 
221                                                         abort = true; 
222                                                 }
223                                         }
224                                 }
225                         }
226                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
227                 
228                         if (columnfile != "") {
229                                 if (namefile == "") { 
230                                         namefile = m->getNameFile(); 
231                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
232                                         else { 
233                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
234                                                 abort = true; 
235                                         }       
236                                 }
237                         }
238                         
239                         if (fastafile != "") {
240                                 if (taxFile == "") { 
241                                         taxFile = m->getTaxonomyFile(); 
242                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
243                                         else { 
244                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
245                                                 abort = true; 
246                                         }       
247                                 }
248                                 
249                                 if (namefile == "") { 
250                                         namefile = m->getNameFile(); 
251                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
252                                         else { 
253                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
254                                                 abort = true; 
255                                         }       
256                                 }
257                         }
258                                         
259                         //check for optional parameter and set defaults
260                         // ...at some point should added some additional type checking...
261                         //get user cutoff and precision or use defaults
262                         string temp;
263                         temp = validParameter.validFile(parameters, "precision", false);
264                         if (temp == "not found") { temp = "100"; }
265                         //saves precision legnth for formatting below
266                         length = temp.length();
267                         convert(temp, precision); 
268                         
269                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
270                         hard = m->isTrue(temp);
271                         
272                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
273                         large = m->isTrue(temp);
274                         
275                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
276                         m->setProcessors(temp);
277                         convert(temp, processors);
278                         
279                         temp = validParameter.validFile(parameters, "splitmethod", false);      
280                         if (splitmethod != "fasta") {
281                                 if (temp == "not found")  { splitmethod = "distance"; }
282                                 else {  splitmethod = temp; }
283                         }
284                         
285                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
286                         convert(temp, cutoff); 
287                         cutoff += (5 / (precision * 10.0));  
288                         
289                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "3"; }
290                         convert(temp, taxLevelCutoff); 
291                         
292                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
293                         
294                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
295                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
296                         
297                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
298                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
299                         
300                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
301
302                         showabund = validParameter.validFile(parameters, "showabund", false);
303                         if (showabund == "not found") { showabund = "T"; }
304
305                         timing = validParameter.validFile(parameters, "timing", false);
306                         if (timing == "not found") { timing = "F"; }
307                         
308                 }
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
312                 exit(1);
313         }
314 }
315
316 //**********************************************************************************************************************
317
318 int ClusterSplitCommand::execute(){
319         try {
320         
321                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
322                 
323                 time_t estart;
324                 vector<string> listFileNames;
325                 set<string> labels;
326                 string singletonName = "";
327                 double saveCutoff = cutoff;
328
329                 //****************** file prep work ******************************//
330                 #ifdef USE_MPI
331                         int pid;
332                         int tag = 2001;
333                         MPI_Status status; 
334                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
335                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
336                         
337                         if (pid == 0) { //only process 0 converts and splits
338                         
339                 #endif
340                 
341                 //if user gave a phylip file convert to column file
342                 if (format == "phylip") {
343                         estart = time(NULL);
344                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
345                         
346                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
347                         
348                         NameAssignment* nameMap = NULL;
349                         convert->setFormat("phylip");
350                         convert->read(nameMap);
351                         
352                         if (m->control_pressed) {  delete convert;  return 0;  }
353                         
354                         distfile = convert->getOutputFile();
355                 
356                         //if no names file given with phylip file, create it
357                         ListVector* listToMakeNameFile =  convert->getListVector();
358                         if (namefile == "") {  //you need to make a namefile for split matrix
359                                 ofstream out;
360                                 namefile = phylipfile + ".names";
361                                 m->openOutputFile(namefile, out);
362                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
363                                         string bin = listToMakeNameFile->get(i);
364                                         out << bin << '\t' << bin << endl;
365                                 }
366                                 out.close();
367                         }
368                         delete listToMakeNameFile;
369                         delete convert;
370                         
371                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
372                 }
373                 if (m->control_pressed) { return 0; }
374                 
375                 estart = time(NULL);
376                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
377                 
378                 //split matrix into non-overlapping groups
379                 SplitMatrix* split;
380                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
381                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
382                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
383                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
384                 
385                 split->split();
386                 
387                 if (m->control_pressed) { delete split; return 0; }
388                 
389                 singletonName = split->getSingletonNames();
390                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
391                 delete split;
392                 
393                 //output a merged distance file
394                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
395                         
396                                 
397                 if (m->control_pressed) { return 0; }
398                 
399                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
400                 estart = time(NULL);
401                 
402                 //****************** break up files between processes and cluster each file set ******************************//
403         #ifdef USE_MPI
404                         ////you are process 0 from above////
405                         
406                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...                           
407                         dividedNames.resize(processors);
408                                         
409                         //for each file group figure out which process will complete it
410                         //want to divide the load intelligently so the big files are spread between processes
411                         for (int i = 0; i < distName.size(); i++) { 
412                                 int processToAssign = (i+1) % processors; 
413                                 if (processToAssign == 0) { processToAssign = processors; }
414                                                 
415                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
416                         }
417                                         
418                         //not lets reverse the order of ever other process, so we balance big files running with little ones
419                         for (int i = 0; i < processors; i++) {
420                                 int remainder = ((i+1) % processors);
421                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
422                         }
423                         
424                         
425                         //send each child the list of files it needs to process
426                         for(int i = 1; i < processors; i++) { 
427                                 //send number of file pairs
428                                 int num = dividedNames[i].size();
429                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
430                                 
431                                 for (int j = 0; j < num; j++) { //send filenames to process i
432                                         char tempDistFileName[1024];
433                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
434                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
435                                         
436                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
437                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
438                                         
439                                         char tempNameFileName[1024];
440                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
441                                         int lengthName = (dividedNames[i][j].begin()->second).length();
442
443                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
444                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
445                                 }
446                         }
447                         
448                         //process your share
449                         listFileNames = cluster(dividedNames[0], labels);
450                         
451                         //receive the other processes info
452                         for(int i = 1; i < processors; i++) { 
453                                 int num = dividedNames[i].size();
454                                 
455                                 double tempCutoff;
456                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
457                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
458                                 
459                                 //send list filenames to root process
460                                 for (int j = 0; j < num; j++) {  
461                                         int lengthList = 0;
462                                         char tempListFileName[1024];
463                                 
464                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
465                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
466                                 
467                                         string myListFileName = tempListFileName;
468                                         myListFileName = myListFileName.substr(0, lengthList);
469                                         
470                                         listFileNames.push_back(myListFileName);
471                                 }
472                                 
473                                 //send Labels to root process
474                                 int numLabels = 0;
475                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
476                                 
477                                 for (int j = 0; j < numLabels; j++) {  
478                                         int lengthLabel = 0;
479                                         char tempLabel[100];
480                                 
481                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
482                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
483                                 
484                                         string myLabel = tempLabel;
485                                         myLabel = myLabel.substr(0, lengthLabel);
486                         
487                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
488                                 }
489                         }
490                         
491                 }else { //you are a child process
492                         vector < map<string, string> >  myNames;
493                         
494                         //recieve the files you need to process
495                         //receive number of file pairs
496                         int num = 0;
497                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
498                         
499                         myNames.resize(num);
500         
501                         for (int j = 0; j < num; j++) { //receive filenames to process 
502                                 int lengthDist = 0;
503                                 char tempDistFileName[1024];
504                                 
505                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
506                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
507                                 
508                                 string myDistFileName = tempDistFileName;
509                                 myDistFileName = myDistFileName.substr(0, lengthDist);
510                         
511                                 int lengthName = 0;
512                                 char tempNameFileName[1024];
513                                 
514                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
515                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
516                                 
517                                 string myNameFileName = tempNameFileName;
518                                 myNameFileName = myNameFileName.substr(0, lengthName);
519                                 
520                                 //save file name
521                                 myNames[j][myDistFileName] = myNameFileName;
522                         }
523         
524                         //process them
525                         listFileNames = cluster(myNames, labels);
526                         
527                         //send cutoff
528                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
529                         
530                         //send list filenames to root process
531                         for (int j = 0; j < num; j++) {  
532                                 char tempListFileName[1024];
533                                 strcpy(tempListFileName, listFileNames[j].c_str());
534                                 int lengthList = listFileNames[j].length();
535                                         
536                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
537                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
538                         }
539                         
540                         //send Labels to root process
541                         int numLabels = labels.size();
542                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
543                         
544                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
545                                 char tempLabel[100];
546                                 strcpy(tempLabel, (*it).c_str());
547                                 int lengthLabel = (*it).length();
548                                         
549                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
550                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
551                         }
552                 }
553                 
554                 //make everyone wait
555                 MPI_Barrier(MPI_COMM_WORLD);
556                 
557         #else
558                 
559                 //sanity check
560                 if (processors > distName.size()) { processors = distName.size(); }
561                 
562                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
563                                 if(processors == 1){
564                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
565                                 }else{
566                                         
567                                         cout << processors << '\t' << distName.size() << endl;
568                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
569                                         dividedNames.resize(processors);
570                                         
571                                         //for each file group figure out which process will complete it
572                                         //want to divide the load intelligently so the big files are spread between processes
573                                         for (int i = 0; i < distName.size(); i++) { 
574                                                 cout << i << endl;
575                                                 int processToAssign = (i+1) % processors; 
576                                                 if (processToAssign == 0) { processToAssign = processors; }
577                                                 
578                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
579                                         }
580                                         
581                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
582                                         for (int i = 0; i < processors; i++) {
583                                                 cout << i << endl;
584                                                 int remainder = ((i+1) % processors);
585                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
586                                         }
587                                         
588                                         createProcesses(dividedNames);
589                                                         
590                                         if (m->control_pressed) { return 0; }
591
592                                         //get list of list file names from each process
593                                         for(int i=0;i<processors;i++){
594                                                 string filename = toString(processIDS[i]) + ".temp";
595                                                 ifstream in;
596                                                 m->openInputFile(filename, in);
597                                                 
598                                                 in >> tag; m->gobble(in);
599                                                 
600                                                 while(!in.eof()) {
601                                                         string tempName;
602                                                         in >> tempName; m->gobble(in);
603                                                         listFileNames.push_back(tempName);
604                                                 }
605                                                 in.close();
606                                                 m->mothurRemove((toString(processIDS[i]) + ".temp"));
607                                                 
608                                                 //get labels
609                                                 filename = toString(processIDS[i]) + ".temp.labels";
610                                                 ifstream in2;
611                                                 m->openInputFile(filename, in2);
612                                                 
613                                                 float tempCutoff;
614                                                 in2 >> tempCutoff; m->gobble(in2);
615                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
616                                                 
617                                                 while(!in2.eof()) {
618                                                         string tempName;
619                                                         in2 >> tempName; m->gobble(in2);
620                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
621                                                 }
622                                                 in2.close();
623                                                 m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
624                                         }
625                                 }
626                 #else
627                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
628                 #endif
629         #endif  
630                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } return 0; }
631                 
632                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
633                 
634                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
635                 
636                 //****************** merge list file and create rabund and sabund files ******************************//
637                 estart = time(NULL);
638                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
639                 
640                 #ifdef USE_MPI
641                         if (pid == 0) { //only process 0 merges
642                 #endif
643
644                 ListVector* listSingle;
645                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
646                 
647                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
648                 
649                 mergeLists(listFileNames, labelBins, listSingle);
650
651                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
652                 
653                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
654                 
655                 //set list file as new current listfile
656                 string current = "";
657                 itTypes = outputTypes.find("list");
658                 if (itTypes != outputTypes.end()) {
659                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
660                 }
661                 
662                 //set rabund file as new current rabundfile
663                 itTypes = outputTypes.find("rabund");
664                 if (itTypes != outputTypes.end()) {
665                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
666                 }
667                 
668                 //set sabund file as new current sabundfile
669                 itTypes = outputTypes.find("sabund");
670                 if (itTypes != outputTypes.end()) {
671                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
672                 }
673                                 
674                 m->mothurOutEndLine();
675                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
676                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
677                 m->mothurOutEndLine();
678                 
679                 #ifdef USE_MPI
680                         } //only process 0 merges
681                         
682                         //make everyone wait
683                         MPI_Barrier(MPI_COMM_WORLD);
684                 #endif
685
686                 return 0;
687         }
688         catch(exception& e) {
689                 m->errorOut(e, "ClusterSplitCommand", "execute");
690                 exit(1);
691         }
692 }
693 //**********************************************************************************************************************
694 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
695         try {
696                                 
697                 map<float, int> labelBin;
698                 vector<float> orderFloat;
699                 int numSingleBins;
700                 
701                 //read in singletons
702                 if (singleton != "none") {
703                         ifstream in;
704                         m->openInputFile(singleton, in);
705                                 
706                         string firstCol, secondCol;
707                         listSingle = new ListVector();
708                         while (!in.eof()) {
709                                 in >> firstCol >> secondCol; m->gobble(in);
710                                 listSingle->push_back(secondCol);
711                         }
712                         in.close();
713                         m->mothurRemove(singleton);
714                         
715                         numSingleBins = listSingle->getNumBins();
716                 }else{  listSingle = NULL; numSingleBins = 0;  }
717                 
718                 //go through users set and make them floats so we can sort them 
719                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
720                         float temp = -10.0;
721
722                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
723                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
724                         
725                         if (temp <= cutoff) {
726                                 orderFloat.push_back(temp);
727                                 labelBin[temp] = numSingleBins; //initialize numbins 
728                         }
729                 }
730         
731                 //sort order
732                 sort(orderFloat.begin(), orderFloat.end());
733                 userLabels.clear();
734                         
735                 //get the list info from each file
736                 for (int k = 0; k < listNames.size(); k++) {
737         
738                         if (m->control_pressed) {  
739                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; m->mothurRemove(singleton);  }
740                                 for (int i = 0; i < listNames.size(); i++) {   m->mothurRemove(listNames[i]);  }
741                                 return labelBin;
742                         }
743                         
744                         InputData* input = new InputData(listNames[k], "list");
745                         ListVector* list = input->getListVector();
746                         string lastLabel = list->getLabel();
747                         
748                         string filledInList = listNames[k] + "filledInTemp";
749                         ofstream outFilled;
750                         m->openOutputFile(filledInList, outFilled);
751         
752                         //for each label needed
753                         for(int l = 0; l < orderFloat.size(); l++){
754                         
755                                 string thisLabel;
756                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
757                                 else { thisLabel = toString(orderFloat[l],  length-1);  } 
758
759                                 //this file has reached the end
760                                 if (list == NULL) { 
761                                         list = input->getListVector(lastLabel, true); 
762                                 }else{  //do you have the distance, or do you need to fill in
763                                                 
764                                         float labelFloat;
765                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
766                                         else { convert(list->getLabel(), labelFloat); }
767
768                                         //check for missing labels
769                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
770                                                 //if its bigger get last label, otherwise keep it
771                                                 delete list;
772                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
773                                         }
774                                         lastLabel = list->getLabel();
775                                 }
776                                 
777                                 //print to new file
778                                 list->setLabel(thisLabel);
779                                 list->print(outFilled);
780                 
781                                 //update labelBin
782                                 labelBin[orderFloat[l]] += list->getNumBins();
783                                                                         
784                                 delete list;
785                                                                         
786                                 list = input->getListVector();
787                         }
788                         
789                         if (list != NULL) { delete list; }
790                         delete input;
791                         
792                         outFilled.close();
793                         m->mothurRemove(listNames[k]);
794                         rename(filledInList.c_str(), listNames[k].c_str());
795                 }
796                 
797                 return labelBin;
798         }
799         catch(exception& e) {
800                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
801                 exit(1);
802         }
803 }
804 //**********************************************************************************************************************
805 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
806         try {
807                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
808                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
809                 
810                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
811                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
812                 m->openOutputFile(fileroot+ tag + ".list",              outList);
813                                 
814                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
815                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
816                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
817                 
818                 map<float, int>::iterator itLabel;
819
820                 //for each label needed
821                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
822                         
823                         string thisLabel;
824                         if (itLabel->first == -1) { thisLabel = "unique"; }
825                         else { thisLabel = toString(itLabel->first,  length-1);  } 
826                         
827                         outList << thisLabel << '\t' << itLabel->second << '\t';
828
829                         RAbundVector* rabund = new RAbundVector();
830                         rabund->setLabel(thisLabel);
831
832                         //add in singletons
833                         if (listSingle != NULL) {
834                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
835                                         outList << listSingle->get(j) << '\t';
836                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
837                                 }
838                         }
839                         
840                         //get the list info from each file
841                         for (int k = 0; k < listNames.size(); k++) {
842         
843                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]);  } delete rabund; return 0; }
844                                 
845                                 InputData* input = new InputData(listNames[k], "list");
846                                 ListVector* list = input->getListVector(thisLabel);
847                                 
848                                 //this file has reached the end
849                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
850                                 else {          
851                                         for (int j = 0; j < list->getNumBins(); j++) {
852                                                 outList << list->get(j) << '\t';
853                                                 rabund->push_back(m->getNumNames(list->get(j)));
854                                         }
855                                         delete list;
856                                 }
857                                 delete input;
858                         }
859                         
860                         SAbundVector sabund = rabund->getSAbundVector();
861                         
862                         sabund.print(outSabund);
863                         rabund->print(outRabund);
864                         outList << endl;
865                         
866                         delete rabund;
867                 }
868                 
869                 outList.close();
870                 outRabund.close();
871                 outSabund.close();
872                 
873                 if (listSingle != NULL) { delete listSingle;  }
874                 
875                 for (int i = 0; i < listNames.size(); i++) {  m->mothurRemove(listNames[i]);  }
876                 
877                 return 0;
878         }
879         catch(exception& e) {
880                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
881                 exit(1);
882         }
883 }
884
885 //**********************************************************************************************************************
886
887 void ClusterSplitCommand::printData(ListVector* oldList){
888         try {
889                 string label = oldList->getLabel();
890                 RAbundVector oldRAbund = oldList->getRAbundVector();
891                 
892                 oldRAbund.setLabel(label);
893                 if (m->isTrue(showabund)) {
894                         oldRAbund.getSAbundVector().print(cout);
895                 }
896                 oldRAbund.print(outRabund);
897                 oldRAbund.getSAbundVector().print(outSabund);
898         
899                 oldList->print(outList);
900         }
901         catch(exception& e) {
902                 m->errorOut(e, "ClusterSplitCommand", "printData");
903                 exit(1);
904         }
905 }
906 //**********************************************************************************************************************
907 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
908         try {
909         
910         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
911                 int process = 0;
912                 int exitCommand = 1;
913                 processIDS.clear();
914                 
915                 //loop through and create all the processes you want
916                 while (process != processors) {
917                         int pid = fork();
918                         
919                         if (pid > 0) {
920                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
921                                 process++;
922                         }else if (pid == 0){
923                                 set<string> labels;
924                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
925                                 
926                                 //write out names to file
927                                 string filename = toString(getpid()) + ".temp";
928                                 ofstream out;
929                                 m->openOutputFile(filename, out);
930                                 out << tag << endl;
931                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
932                                 out.close();
933                                 
934                                 //print out labels
935                                 ofstream outLabels;
936                                 filename = toString(getpid()) + ".temp.labels";
937                                 m->openOutputFile(filename, outLabels);
938                                 
939                                 outLabels << cutoff << endl;
940                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
941                                         outLabels << (*it) << endl;
942                                 }
943                                 outLabels.close();
944
945                                 exit(0);
946                         }else { 
947                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
948                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
949                                 exit(0);
950                         }
951                 }
952                 
953                 //force parent to wait until all the processes are done
954                 for (int i=0;i<processors;i++) { 
955                         int temp = processIDS[i];
956                         wait(&temp);
957                 }
958                 
959                 return exitCommand;
960         #endif          
961         
962         }
963         catch(exception& e) {
964                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
965                 exit(1);
966         }
967 }
968 //**********************************************************************************************************************
969
970 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
971         try {
972                 Cluster* cluster;
973                 SparseMatrix* matrix;
974                 ListVector* list;
975                 ListVector oldList;
976                 RAbundVector* rabund;
977                 
978                 vector<string> listFileNames;
979                 
980                 double smallestCutoff = cutoff;
981                 
982                 //cluster each distance file
983                 for (int i = 0; i < distNames.size(); i++) {
984                         if (m->control_pressed) { return listFileNames; }
985                         
986                         string thisNamefile = distNames[i].begin()->second;
987                         string thisDistFile = distNames[i].begin()->first;
988                                                 
989                         #ifdef USE_MPI
990                                 int pid;
991                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
992                                 
993                                 //output your files too
994                                 if (pid != 0) {
995                                         cout << endl << "Reading " << thisDistFile << endl;
996                                 }
997                         #endif
998                         
999                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
1000                         
1001                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
1002                         read->setCutoff(cutoff);
1003
1004                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
1005                         nameMap->readMap();
1006                         read->read(nameMap);
1007                         
1008                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
1009                         
1010                         list = read->getListVector();
1011                         oldList = *list;
1012                         matrix = read->getMatrix();
1013                         
1014                         delete read; 
1015                         delete nameMap; 
1016                         
1017                         
1018                         #ifdef USE_MPI
1019                                 //output your files too
1020                                 if (pid != 0) {
1021                                         cout << endl << "Clustering " << thisDistFile << endl;
1022                                 }
1023                         #endif
1024                         
1025                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1026                 
1027                         rabund = new RAbundVector(list->getRAbundVector());
1028                         
1029                         //create cluster
1030                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1031                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1032                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1033                         tag = cluster->getTag();
1034                 
1035                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1036                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1037                         
1038                         ofstream listFile;
1039                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1040                 
1041                         listFileNames.push_back(fileroot+ tag + ".list");
1042                                 
1043                         float previousDist = 0.00000;
1044                         float rndPreviousDist = 0.00000;
1045                         
1046                         oldList = *list;
1047
1048                         print_start = true;
1049                         start = time(NULL);
1050                         double saveCutoff = cutoff;
1051                 
1052                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1053                 
1054                                 if (m->control_pressed) { //clean up
1055                                         delete matrix; delete list;     delete cluster; delete rabund;
1056                                         listFile.close();
1057                                         for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
1058                                         listFileNames.clear(); return listFileNames;
1059                                 }
1060                 
1061                                 cluster->update(saveCutoff);
1062         
1063                                 float dist = matrix->getSmallDist();
1064                                 float rndDist;
1065                                 if (hard) {
1066                                         rndDist = m->ceilDist(dist, precision); 
1067                                 }else{
1068                                         rndDist = m->roundDist(dist, precision); 
1069                                 }
1070
1071                                 if(previousDist <= 0.0000 && dist != previousDist){
1072                                         oldList.setLabel("unique");
1073                                         oldList.print(listFile);
1074                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1075                                 }
1076                                 else if(rndDist != rndPreviousDist){
1077                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1078                                         oldList.print(listFile);
1079                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1080                                 }
1081                 
1082                                 previousDist = dist;
1083                                 rndPreviousDist = rndDist;
1084                                 oldList = *list;
1085                         }
1086
1087                 
1088                         if(previousDist <= 0.0000){
1089                                 oldList.setLabel("unique");
1090                                 oldList.print(listFile);
1091                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1092                         }
1093                         else if(rndPreviousDist<cutoff){
1094                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1095                                 oldList.print(listFile);
1096                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1097                         }
1098         
1099                         delete matrix; delete list;     delete cluster; delete rabund; 
1100                         listFile.close();
1101                         
1102                         if (m->control_pressed) { //clean up
1103                                 for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
1104                                 listFileNames.clear(); return listFileNames;
1105                         }
1106                         
1107                         m->mothurRemove(thisDistFile);
1108                         m->mothurRemove(thisNamefile);
1109                         
1110                         if (saveCutoff != cutoff) { 
1111                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1112                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1113                         
1114                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
1115                         }
1116                         
1117                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1118                 }
1119                 
1120                 cutoff = smallestCutoff;
1121                                         
1122                 return listFileNames;
1123         
1124         }
1125         catch(exception& e) {
1126                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1127                 exit(1);
1128         }
1129
1130
1131 }
1132 //**********************************************************************************************************************
1133
1134 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1135         try{
1136                 
1137 #ifdef USE_MPI
1138                 int pid;
1139                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1140                 
1141                 if (pid != 0) {
1142 #endif
1143                 
1144                 string thisOutputDir = outputDir;
1145                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1146                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1147                 m->mothurRemove(outputFileName);
1148                 
1149                 
1150                 for (int i = 0; i < distNames.size(); i++) {
1151                         if (m->control_pressed) {  return 0; }
1152                         
1153                         string thisDistFile = distNames[i].begin()->first;
1154                         
1155                         m->appendFiles(thisDistFile, outputFileName);
1156                 }       
1157                         
1158                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1159                         
1160 #ifdef USE_MPI
1161                 }
1162 #endif
1163                                 
1164                 return 0;       
1165                 
1166                 
1167         }
1168         catch(exception& e) {
1169                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1170                 exit(1);
1171         }
1172 }
1173 //**********************************************************************************************************************