]> git.donarmstrong.com Git - mothur.git/blob - clustersplitcommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / clustersplitcommand.cpp
1 /*
2  *  clustersplitcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "clustersplitcommand.h"
11 #include "readcluster.h"
12 #include "splitmatrix.h"
13 #include "readphylip.h"
14 #include "readcolumn.h"
15 #include "readmatrix.hpp"
16 #include "inputdata.h"
17
18
19 //**********************************************************************************************************************
20 vector<string> ClusterSplitCommand::setParameters(){    
21         try {
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
33                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
39                         
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ClusterSplitCommand::getHelpString(){    
51         try {
52                 string helpString = "";
53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
70 #ifdef USE_MPI
71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
72 #endif
73                 helpString += "The cluster.split command should be in the following format: \n";
74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";       
76                 return helpString;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 ClusterSplitCommand::ClusterSplitCommand(){     
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["list"] = tempOutNames;
90                 outputTypes["rabund"] = tempOutNames;
91                 outputTypes["sabund"] = tempOutNames;
92                 outputTypes["column"] = tempOutNames;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
96                 exit(1);
97         }
98 }
99 //**********************************************************************************************************************
100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
102         try{
103                 abort = false; calledHelp = false;   
104                 format = "";
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string,string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter("cluster.split");
117                 
118                         //check to make sure all parameters are valid for command
119                         map<string,string>::iterator it;
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
122                                         abort = true;
123                                 }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["list"] = tempOutNames;
129                         outputTypes["rabund"] = tempOutNames;
130                         outputTypes["sabund"] = tempOutNames;
131                         outputTypes["column"] = tempOutNames;
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
135                         
136                                 //if the user changes the input directory command factory will send this info to us in the output parameter 
137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
138                         if (inputDir == "not found"){   inputDir = "";          }
139                         else {
140                                 string path;
141                                 it = parameters.find("phylip");
142                                 //user has given a template file
143                                 if(it != parameters.end()){ 
144                                         path = m->hasPath(it->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
147                                 }
148                                 
149                                 it = parameters.find("column");
150                                 //user has given a template file
151                                 if(it != parameters.end()){ 
152                                         path = m->hasPath(it->second);
153                                         //if the user has not given a path then, add inputdir. else leave path alone.
154                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
155                                 }
156                                 
157                                 it = parameters.find("name");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
163                                 }
164                                 
165                                 it = parameters.find("taxonomy");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
171                                 }
172                                 
173                                 it = parameters.find("fasta");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
179                                 }
180                         }
181                         
182                         //check for required parameters
183                         phylipfile = validParameter.validFile(parameters, "phylip", true);
184                         if (phylipfile == "not open") { abort = true; }
185                         else if (phylipfile == "not found") { phylipfile = ""; }        
186                         else {  distfile = phylipfile;  format = "phylip";      m->setPhylipFile(phylipfile); }
187                         
188                         columnfile = validParameter.validFile(parameters, "column", true);
189                         if (columnfile == "not open") { abort = true; } 
190                         else if (columnfile == "not found") { columnfile = ""; }
191                         else {  distfile = columnfile; format = "column";       m->setColumnFile(columnfile); }
192                         
193                         namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not open") { abort = true; }   
195                         else if (namefile == "not found") { namefile = "";  }
196                         else { m->setNameFile(namefile); }
197                         
198                         fastafile = validParameter.validFile(parameters, "fasta", true);
199                         if (fastafile == "not open") { abort = true; }  
200                         else if (fastafile == "not found") { fastafile = ""; }
201                         else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
202                         
203                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
204                         if (taxFile == "not open") { abort = true; }    
205                         else if (taxFile == "not found") { taxFile = ""; }
206                         else {  m->setTaxonomyFile(taxFile); }
207                         
208                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { 
209                                 //is there are current file available for either of these?
210                                 //give priority to column, then phylip, then fasta
211                                 columnfile = m->getColumnFile(); 
212                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
213                                 else { 
214                                         phylipfile = m->getPhylipFile(); 
215                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
216                                         else { 
217                                                 fastafile = m->getFastaFile(); 
218                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
219                                                 else { 
220                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); 
221                                                         abort = true; 
222                                                 }
223                                         }
224                                 }
225                         }
226                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
227                 
228                         if (columnfile != "") {
229                                 if (namefile == "") { 
230                                         namefile = m->getNameFile(); 
231                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
232                                         else { 
233                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
234                                                 abort = true; 
235                                         }       
236                                 }
237                         }
238                         
239                         if (fastafile != "") {
240                                 if (taxFile == "") { 
241                                         taxFile = m->getTaxonomyFile(); 
242                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
243                                         else { 
244                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
245                                                 abort = true; 
246                                         }       
247                                 }
248                                 
249                                 if (namefile == "") { 
250                                         namefile = m->getNameFile(); 
251                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
252                                         else { 
253                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine(); 
254                                                 abort = true; 
255                                         }       
256                                 }
257                         }
258                                         
259                         //check for optional parameter and set defaults
260                         // ...at some point should added some additional type checking...
261                         //get user cutoff and precision or use defaults
262                         string temp;
263                         temp = validParameter.validFile(parameters, "precision", false);
264                         if (temp == "not found") { temp = "100"; }
265                         //saves precision legnth for formatting below
266                         length = temp.length();
267                         convert(temp, precision); 
268                         
269                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
270                         hard = m->isTrue(temp);
271                         
272                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
273                         large = m->isTrue(temp);
274                         
275                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
276                         m->setProcessors(temp);
277                         convert(temp, processors);
278                         
279                         temp = validParameter.validFile(parameters, "splitmethod", false);      
280                         if (splitmethod != "fasta") {
281                                 if (temp == "not found")  { splitmethod = "distance"; }
282                                 else {  splitmethod = temp; }
283                         }
284                         
285                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
286                         convert(temp, cutoff); 
287                         cutoff += (5 / (precision * 10.0));  
288                         
289                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
290                         convert(temp, taxLevelCutoff); 
291                         
292                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
293                         
294                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
295                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
296                         
297                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
298                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
299                         
300                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
301
302                         showabund = validParameter.validFile(parameters, "showabund", false);
303                         if (showabund == "not found") { showabund = "T"; }
304
305                         timing = validParameter.validFile(parameters, "timing", false);
306                         if (timing == "not found") { timing = "F"; }
307                         
308                 }
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
312                 exit(1);
313         }
314 }
315
316 //**********************************************************************************************************************
317
318 int ClusterSplitCommand::execute(){
319         try {
320         
321                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
322                 
323                 time_t estart;
324                 vector<string> listFileNames;
325                 set<string> labels;
326                 string singletonName = "";
327                 double saveCutoff = cutoff;
328
329                 //****************** file prep work ******************************//
330                 #ifdef USE_MPI
331                         int pid;
332                         int tag = 2001;
333                         MPI_Status status; 
334                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
335                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
336                         
337                         if (pid == 0) { //only process 0 converts and splits
338                         
339                 #endif
340                 
341                 //if user gave a phylip file convert to column file
342                 if (format == "phylip") {
343                         estart = time(NULL);
344                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
345                         
346                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
347                         
348                         NameAssignment* nameMap = NULL;
349                         convert->setFormat("phylip");
350                         convert->read(nameMap);
351                         
352                         if (m->control_pressed) {  delete convert;  return 0;  }
353                         
354                         distfile = convert->getOutputFile();
355                 
356                         //if no names file given with phylip file, create it
357                         ListVector* listToMakeNameFile =  convert->getListVector();
358                         if (namefile == "") {  //you need to make a namefile for split matrix
359                                 ofstream out;
360                                 namefile = phylipfile + ".names";
361                                 m->openOutputFile(namefile, out);
362                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
363                                         string bin = listToMakeNameFile->get(i);
364                                         out << bin << '\t' << bin << endl;
365                                 }
366                                 out.close();
367                         }
368                         delete listToMakeNameFile;
369                         delete convert;
370                         
371                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
372                 }
373                 if (m->control_pressed) { return 0; }
374                 
375                 estart = time(NULL);
376                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
377                 
378                 //split matrix into non-overlapping groups
379                 SplitMatrix* split;
380                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
381                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
382                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
383                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
384                 
385                 split->split();
386                 
387                 if (m->control_pressed) { delete split; return 0; }
388                 
389                 singletonName = split->getSingletonNames();
390                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
391                 delete split;
392                 
393                 //output a merged distance file
394                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
395                         
396                                 
397                 if (m->control_pressed) { return 0; }
398                 
399                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
400                 estart = time(NULL);
401                 
402                 //****************** break up files between processes and cluster each file set ******************************//
403         #ifdef USE_MPI
404                         ////you are process 0 from above////
405                         
406                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...                           
407                         dividedNames.resize(processors);
408                                         
409                         //for each file group figure out which process will complete it
410                         //want to divide the load intelligently so the big files are spread between processes
411                         for (int i = 0; i < distName.size(); i++) { 
412                                 int processToAssign = (i+1) % processors; 
413                                 if (processToAssign == 0) { processToAssign = processors; }
414                                                 
415                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
416                         }
417                                         
418                         //not lets reverse the order of ever other process, so we balance big files running with little ones
419                         for (int i = 0; i < processors; i++) {
420                                 int remainder = ((i+1) % processors);
421                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
422                         }
423                         
424                         
425                         //send each child the list of files it needs to process
426                         for(int i = 1; i < processors; i++) { 
427                                 //send number of file pairs
428                                 int num = dividedNames[i].size();
429                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
430                                 
431                                 for (int j = 0; j < num; j++) { //send filenames to process i
432                                         char tempDistFileName[1024];
433                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
434                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
435                                         
436                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
437                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
438                                         
439                                         char tempNameFileName[1024];
440                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
441                                         int lengthName = (dividedNames[i][j].begin()->second).length();
442
443                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
444                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
445                                 }
446                         }
447                         
448                         //process your share
449                         listFileNames = cluster(dividedNames[0], labels);
450                         
451                         //receive the other processes info
452                         for(int i = 1; i < processors; i++) { 
453                                 int num = dividedNames[i].size();
454                                 
455                                 double tempCutoff;
456                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
457                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
458                                 
459                                 //send list filenames to root process
460                                 for (int j = 0; j < num; j++) {  
461                                         int lengthList = 0;
462                                         char tempListFileName[1024];
463                                 
464                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
465                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
466                                 
467                                         string myListFileName = tempListFileName;
468                                         myListFileName = myListFileName.substr(0, lengthList);
469                                         
470                                         listFileNames.push_back(myListFileName);
471                                 }
472                                 
473                                 //send Labels to root process
474                                 int numLabels = 0;
475                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
476                                 
477                                 for (int j = 0; j < numLabels; j++) {  
478                                         int lengthLabel = 0;
479                                         char tempLabel[100];
480                                 
481                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
482                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
483                                 
484                                         string myLabel = tempLabel;
485                                         myLabel = myLabel.substr(0, lengthLabel);
486                         
487                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
488                                 }
489                         }
490                         
491                 }else { //you are a child process
492                         vector < map<string, string> >  myNames;
493                         
494                         //recieve the files you need to process
495                         //receive number of file pairs
496                         int num = 0;
497                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
498                         
499                         myNames.resize(num);
500         
501                         for (int j = 0; j < num; j++) { //receive filenames to process 
502                                 int lengthDist = 0;
503                                 char tempDistFileName[1024];
504                                 
505                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
506                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
507                                 
508                                 string myDistFileName = tempDistFileName;
509                                 myDistFileName = myDistFileName.substr(0, lengthDist);
510                         
511                                 int lengthName = 0;
512                                 char tempNameFileName[1024];
513                                 
514                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
515                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
516                                 
517                                 string myNameFileName = tempNameFileName;
518                                 myNameFileName = myNameFileName.substr(0, lengthName);
519                                 
520                                 //save file name
521                                 myNames[j][myDistFileName] = myNameFileName;
522                         }
523         
524                         //process them
525                         listFileNames = cluster(myNames, labels);
526                         
527                         //send cutoff
528                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
529                         
530                         //send list filenames to root process
531                         for (int j = 0; j < num; j++) {  
532                                 char tempListFileName[1024];
533                                 strcpy(tempListFileName, listFileNames[j].c_str());
534                                 int lengthList = listFileNames[j].length();
535                                         
536                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
537                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
538                         }
539                         
540                         //send Labels to root process
541                         int numLabels = labels.size();
542                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
543                         
544                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
545                                 char tempLabel[100];
546                                 strcpy(tempLabel, (*it).c_str());
547                                 int lengthLabel = (*it).length();
548                                         
549                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
550                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
551                         }
552                 }
553                 
554                 //make everyone wait
555                 MPI_Barrier(MPI_COMM_WORLD);
556                 
557         #else
558
559                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
560                                 if(processors == 1){
561                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
562                                 }else{
563                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
564                                         dividedNames.resize(processors);
565                                         
566                                         //for each file group figure out which process will complete it
567                                         //want to divide the load intelligently so the big files are spread between processes
568                                         for (int i = 0; i < distName.size(); i++) { 
569                                                 int processToAssign = (i+1) % processors; 
570                                                 if (processToAssign == 0) { processToAssign = processors; }
571                                                 
572                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
573                                         }
574                                         
575                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
576                                         for (int i = 0; i < processors; i++) {
577                                                 int remainder = ((i+1) % processors);
578                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
579                                         }
580                                         
581                                         createProcesses(dividedNames);
582                                                         
583                                         if (m->control_pressed) { return 0; }
584
585                                         //get list of list file names from each process
586                                         for(int i=0;i<processors;i++){
587                                                 string filename = toString(processIDS[i]) + ".temp";
588                                                 ifstream in;
589                                                 m->openInputFile(filename, in);
590                                                 
591                                                 in >> tag; m->gobble(in);
592                                                 
593                                                 while(!in.eof()) {
594                                                         string tempName;
595                                                         in >> tempName; m->gobble(in);
596                                                         listFileNames.push_back(tempName);
597                                                 }
598                                                 in.close();
599                                                 m->mothurRemove((toString(processIDS[i]) + ".temp"));
600                                                 
601                                                 //get labels
602                                                 filename = toString(processIDS[i]) + ".temp.labels";
603                                                 ifstream in2;
604                                                 m->openInputFile(filename, in2);
605                                                 
606                                                 float tempCutoff;
607                                                 in2 >> tempCutoff; m->gobble(in2);
608                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
609                                                 
610                                                 while(!in2.eof()) {
611                                                         string tempName;
612                                                         in2 >> tempName; m->gobble(in2);
613                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
614                                                 }
615                                                 in2.close();
616                                                 m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
617                                         }
618                                 }
619                 #else
620                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
621                 #endif
622         #endif  
623                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } return 0; }
624                 
625                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
626                 
627                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
628                 
629                 //****************** merge list file and create rabund and sabund files ******************************//
630                 estart = time(NULL);
631                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
632                 
633                 #ifdef USE_MPI
634                         if (pid == 0) { //only process 0 merges
635                 #endif
636
637                 ListVector* listSingle;
638                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
639                 
640                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
641                 
642                 mergeLists(listFileNames, labelBins, listSingle);
643
644                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
645                 
646                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
647                 
648                 //set list file as new current listfile
649                 string current = "";
650                 itTypes = outputTypes.find("list");
651                 if (itTypes != outputTypes.end()) {
652                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
653                 }
654                 
655                 //set rabund file as new current rabundfile
656                 itTypes = outputTypes.find("rabund");
657                 if (itTypes != outputTypes.end()) {
658                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
659                 }
660                 
661                 //set sabund file as new current sabundfile
662                 itTypes = outputTypes.find("sabund");
663                 if (itTypes != outputTypes.end()) {
664                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
665                 }
666                                 
667                 m->mothurOutEndLine();
668                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
669                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
670                 m->mothurOutEndLine();
671                 
672                 #ifdef USE_MPI
673                         } //only process 0 merges
674                         
675                         //make everyone wait
676                         MPI_Barrier(MPI_COMM_WORLD);
677                 #endif
678
679                 return 0;
680         }
681         catch(exception& e) {
682                 m->errorOut(e, "ClusterSplitCommand", "execute");
683                 exit(1);
684         }
685 }
686 //**********************************************************************************************************************
687 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
688         try {
689                                 
690                 map<float, int> labelBin;
691                 vector<float> orderFloat;
692                 int numSingleBins;
693                 
694                 //read in singletons
695                 if (singleton != "none") {
696                         ifstream in;
697                         m->openInputFile(singleton, in);
698                                 
699                         string firstCol, secondCol;
700                         listSingle = new ListVector();
701                         while (!in.eof()) {
702                                 in >> firstCol >> secondCol; m->gobble(in);
703                                 listSingle->push_back(secondCol);
704                         }
705                         in.close();
706                         m->mothurRemove(singleton);
707                         
708                         numSingleBins = listSingle->getNumBins();
709                 }else{  listSingle = NULL; numSingleBins = 0;  }
710                 
711                 //go through users set and make them floats so we can sort them 
712                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
713                         float temp = -10.0;
714
715                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
716                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
717                         
718                         if (temp <= cutoff) {
719                                 orderFloat.push_back(temp);
720                                 labelBin[temp] = numSingleBins; //initialize numbins 
721                         }
722                 }
723         
724                 //sort order
725                 sort(orderFloat.begin(), orderFloat.end());
726                 userLabels.clear();
727                         
728                 //get the list info from each file
729                 for (int k = 0; k < listNames.size(); k++) {
730         
731                         if (m->control_pressed) {  
732                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; m->mothurRemove(singleton);  }
733                                 for (int i = 0; i < listNames.size(); i++) {   m->mothurRemove(listNames[i]);  }
734                                 return labelBin;
735                         }
736                         
737                         InputData* input = new InputData(listNames[k], "list");
738                         ListVector* list = input->getListVector();
739                         string lastLabel = list->getLabel();
740                         
741                         string filledInList = listNames[k] + "filledInTemp";
742                         ofstream outFilled;
743                         m->openOutputFile(filledInList, outFilled);
744         
745                         //for each label needed
746                         for(int l = 0; l < orderFloat.size(); l++){
747                         
748                                 string thisLabel;
749                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
750                                 else { thisLabel = toString(orderFloat[l],  length-1);  } 
751
752                                 //this file has reached the end
753                                 if (list == NULL) { 
754                                         list = input->getListVector(lastLabel, true); 
755                                 }else{  //do you have the distance, or do you need to fill in
756                                                 
757                                         float labelFloat;
758                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
759                                         else { convert(list->getLabel(), labelFloat); }
760
761                                         //check for missing labels
762                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
763                                                 //if its bigger get last label, otherwise keep it
764                                                 delete list;
765                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
766                                         }
767                                         lastLabel = list->getLabel();
768                                 }
769                                 
770                                 //print to new file
771                                 list->setLabel(thisLabel);
772                                 list->print(outFilled);
773                 
774                                 //update labelBin
775                                 labelBin[orderFloat[l]] += list->getNumBins();
776                                                                         
777                                 delete list;
778                                                                         
779                                 list = input->getListVector();
780                         }
781                         
782                         if (list != NULL) { delete list; }
783                         delete input;
784                         
785                         outFilled.close();
786                         m->mothurRemove(listNames[k]);
787                         rename(filledInList.c_str(), listNames[k].c_str());
788                 }
789                 
790                 return labelBin;
791         }
792         catch(exception& e) {
793                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
794                 exit(1);
795         }
796 }
797 //**********************************************************************************************************************
798 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
799         try {
800                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
801                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
802                 
803                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
804                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
805                 m->openOutputFile(fileroot+ tag + ".list",              outList);
806                                 
807                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
808                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
809                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
810                 
811                 map<float, int>::iterator itLabel;
812
813                 //for each label needed
814                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
815                         
816                         string thisLabel;
817                         if (itLabel->first == -1) { thisLabel = "unique"; }
818                         else { thisLabel = toString(itLabel->first,  length-1);  } 
819                         
820                         outList << thisLabel << '\t' << itLabel->second << '\t';
821
822                         RAbundVector* rabund = new RAbundVector();
823                         rabund->setLabel(thisLabel);
824
825                         //add in singletons
826                         if (listSingle != NULL) {
827                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
828                                         outList << listSingle->get(j) << '\t';
829                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
830                                 }
831                         }
832                         
833                         //get the list info from each file
834                         for (int k = 0; k < listNames.size(); k++) {
835         
836                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]);  } delete rabund; return 0; }
837                                 
838                                 InputData* input = new InputData(listNames[k], "list");
839                                 ListVector* list = input->getListVector(thisLabel);
840                                 
841                                 //this file has reached the end
842                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
843                                 else {          
844                                         for (int j = 0; j < list->getNumBins(); j++) {
845                                                 outList << list->get(j) << '\t';
846                                                 rabund->push_back(m->getNumNames(list->get(j)));
847                                         }
848                                         delete list;
849                                 }
850                                 delete input;
851                         }
852                         
853                         SAbundVector sabund = rabund->getSAbundVector();
854                         
855                         sabund.print(outSabund);
856                         rabund->print(outRabund);
857                         outList << endl;
858                         
859                         delete rabund;
860                 }
861                 
862                 outList.close();
863                 outRabund.close();
864                 outSabund.close();
865                 
866                 if (listSingle != NULL) { delete listSingle;  }
867                 
868                 for (int i = 0; i < listNames.size(); i++) {  m->mothurRemove(listNames[i]);  }
869                 
870                 return 0;
871         }
872         catch(exception& e) {
873                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
874                 exit(1);
875         }
876 }
877
878 //**********************************************************************************************************************
879
880 void ClusterSplitCommand::printData(ListVector* oldList){
881         try {
882                 string label = oldList->getLabel();
883                 RAbundVector oldRAbund = oldList->getRAbundVector();
884                 
885                 oldRAbund.setLabel(label);
886                 if (m->isTrue(showabund)) {
887                         oldRAbund.getSAbundVector().print(cout);
888                 }
889                 oldRAbund.print(outRabund);
890                 oldRAbund.getSAbundVector().print(outSabund);
891         
892                 oldList->print(outList);
893         }
894         catch(exception& e) {
895                 m->errorOut(e, "ClusterSplitCommand", "printData");
896                 exit(1);
897         }
898 }
899 //**********************************************************************************************************************
900 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
901         try {
902         
903         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
904                 int process = 0;
905                 int exitCommand = 1;
906                 processIDS.clear();
907                 
908                 //loop through and create all the processes you want
909                 while (process != processors) {
910                         int pid = fork();
911                         
912                         if (pid > 0) {
913                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
914                                 process++;
915                         }else if (pid == 0){
916                                 set<string> labels;
917                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
918                                 
919                                 //write out names to file
920                                 string filename = toString(getpid()) + ".temp";
921                                 ofstream out;
922                                 m->openOutputFile(filename, out);
923                                 out << tag << endl;
924                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
925                                 out.close();
926                                 
927                                 //print out labels
928                                 ofstream outLabels;
929                                 filename = toString(getpid()) + ".temp.labels";
930                                 m->openOutputFile(filename, outLabels);
931                                 
932                                 outLabels << cutoff << endl;
933                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
934                                         outLabels << (*it) << endl;
935                                 }
936                                 outLabels.close();
937
938                                 exit(0);
939                         }else { 
940                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
941                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
942                                 exit(0);
943                         }
944                 }
945                 
946                 //force parent to wait until all the processes are done
947                 for (int i=0;i<processors;i++) { 
948                         int temp = processIDS[i];
949                         wait(&temp);
950                 }
951                 
952                 return exitCommand;
953         #endif          
954         
955         }
956         catch(exception& e) {
957                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
958                 exit(1);
959         }
960 }
961 //**********************************************************************************************************************
962
963 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
964         try {
965                 Cluster* cluster;
966                 SparseMatrix* matrix;
967                 ListVector* list;
968                 ListVector oldList;
969                 RAbundVector* rabund;
970                 
971                 vector<string> listFileNames;
972                 
973                 double smallestCutoff = cutoff;
974                 
975                 //cluster each distance file
976                 for (int i = 0; i < distNames.size(); i++) {
977                         if (m->control_pressed) { return listFileNames; }
978                         
979                         string thisNamefile = distNames[i].begin()->second;
980                         string thisDistFile = distNames[i].begin()->first;
981                                                 
982                         #ifdef USE_MPI
983                                 int pid;
984                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
985                                 
986                                 //output your files too
987                                 if (pid != 0) {
988                                         cout << endl << "Reading " << thisDistFile << endl;
989                                 }
990                         #endif
991                         
992                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
993                         
994                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
995                         read->setCutoff(cutoff);
996
997                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
998                         nameMap->readMap();
999                         read->read(nameMap);
1000                         
1001                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
1002                         
1003                         list = read->getListVector();
1004                         oldList = *list;
1005                         matrix = read->getMatrix();
1006                         
1007                         delete read; 
1008                         delete nameMap; 
1009                         
1010                         
1011                         #ifdef USE_MPI
1012                                 //output your files too
1013                                 if (pid != 0) {
1014                                         cout << endl << "Clustering " << thisDistFile << endl;
1015                                 }
1016                         #endif
1017                         
1018                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1019                 
1020                         rabund = new RAbundVector(list->getRAbundVector());
1021                         
1022                         //create cluster
1023                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1024                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1025                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1026                         tag = cluster->getTag();
1027                 
1028                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1029                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1030                         
1031                         ofstream listFile;
1032                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1033                 
1034                         listFileNames.push_back(fileroot+ tag + ".list");
1035                                 
1036                         float previousDist = 0.00000;
1037                         float rndPreviousDist = 0.00000;
1038                         
1039                         oldList = *list;
1040
1041                         print_start = true;
1042                         start = time(NULL);
1043                         double saveCutoff = cutoff;
1044                 
1045                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1046                 
1047                                 if (m->control_pressed) { //clean up
1048                                         delete matrix; delete list;     delete cluster; delete rabund;
1049                                         listFile.close();
1050                                         for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
1051                                         listFileNames.clear(); return listFileNames;
1052                                 }
1053                 
1054                                 cluster->update(saveCutoff);
1055         
1056                                 float dist = matrix->getSmallDist();
1057                                 float rndDist;
1058                                 if (hard) {
1059                                         rndDist = m->ceilDist(dist, precision); 
1060                                 }else{
1061                                         rndDist = m->roundDist(dist, precision); 
1062                                 }
1063
1064                                 if(previousDist <= 0.0000 && dist != previousDist){
1065                                         oldList.setLabel("unique");
1066                                         oldList.print(listFile);
1067                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1068                                 }
1069                                 else if(rndDist != rndPreviousDist){
1070                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1071                                         oldList.print(listFile);
1072                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1073                                 }
1074                 
1075                                 previousDist = dist;
1076                                 rndPreviousDist = rndDist;
1077                                 oldList = *list;
1078                         }
1079
1080                 
1081                         if(previousDist <= 0.0000){
1082                                 oldList.setLabel("unique");
1083                                 oldList.print(listFile);
1084                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1085                         }
1086                         else if(rndPreviousDist<cutoff){
1087                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1088                                 oldList.print(listFile);
1089                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1090                         }
1091         
1092                         delete matrix; delete list;     delete cluster; delete rabund; 
1093                         listFile.close();
1094                         
1095                         if (m->control_pressed) { //clean up
1096                                 for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
1097                                 listFileNames.clear(); return listFileNames;
1098                         }
1099                         
1100                         m->mothurRemove(thisDistFile);
1101                         m->mothurRemove(thisNamefile);
1102                         
1103                         if (saveCutoff != cutoff) { 
1104                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1105                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1106                         
1107                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
1108                         }
1109                         
1110                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1111                 }
1112                 
1113                 cutoff = smallestCutoff;
1114                                         
1115                 return listFileNames;
1116         
1117         }
1118         catch(exception& e) {
1119                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1120                 exit(1);
1121         }
1122
1123
1124 }
1125 //**********************************************************************************************************************
1126
1127 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1128         try{
1129                 
1130 #ifdef USE_MPI
1131                 int pid;
1132                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1133                 
1134                 if (pid != 0) {
1135 #endif
1136                 
1137                 string thisOutputDir = outputDir;
1138                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1139                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1140                 m->mothurRemove(outputFileName);
1141                 
1142                 
1143                 for (int i = 0; i < distNames.size(); i++) {
1144                         if (m->control_pressed) {  return 0; }
1145                         
1146                         string thisDistFile = distNames[i].begin()->first;
1147                         
1148                         m->appendFiles(thisDistFile, outputFileName);
1149                 }       
1150                         
1151                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1152                         
1153 #ifdef USE_MPI
1154                 }
1155 #endif
1156                                 
1157                 return 0;       
1158                 
1159                 
1160         }
1161         catch(exception& e) {
1162                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1163                 exit(1);
1164         }
1165 }
1166 //**********************************************************************************************************************