clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::setParameters(){
  21         try {
  22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
  23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
  24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
  25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
  26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
  27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel);
  28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
  29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
  30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
  31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
  32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
  33                 CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff);
  34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
  35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
  36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
  37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
  38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
  39
  40                 vector<string> myArray;
  41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
  42                 return myArray;
  43         }
  44         catch(exception& e) {
  45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
  46                 exit(1);
  47         }
  48 }
  49 //**********************************************************************************************************************
  50 string ClusterSplitCommand::getHelpString(){
  51         try {
  52                 string helpString = "";
  53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
  54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
  55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
  56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
  57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
  58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
  59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
  60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
  61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
  62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
  63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n";
  64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
  65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
  66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
  67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
  68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
  69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
  70 #ifdef USE_MPI
  71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  72 #endif
  73                 helpString += "The cluster.split command should be in the following format: \n";
  74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
  75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";
  76                 return helpString;
  77         }
  78         catch(exception& e) {
  79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
  80                 exit(1);
  81         }
  82 }
  83 //**********************************************************************************************************************
  84 ClusterSplitCommand::ClusterSplitCommand(){
  85         try {
  86                 abort = true; calledHelp = true;
  87                 setParameters();
  88                 vector<string> tempOutNames;
  89                 outputTypes["list"] = tempOutNames;
  90                 outputTypes["rabund"] = tempOutNames;
  91                 outputTypes["sabund"] = tempOutNames;
  92                 outputTypes["column"] = tempOutNames;
  93         }
  94         catch(exception& e) {
  95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  96                 exit(1);
  97         }
  98 }
  99 //**********************************************************************************************************************
 100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
 101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
 102         try{
 103                 abort = false; calledHelp = false;
 104                 format = "";
 105
 106                 //allow user to run help
 107                 if(option == "help") { help(); abort = true; calledHelp = true; }
 108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
 109
 110                 else {
 111                         vector<string> myArray = setParameters();
 112
 113                         OptionParser parser(option);
 114                         map<string,string> parameters = parser.getParameters();
 115
 116                         ValidParameters validParameter("cluster.split");
 117
 118                         //check to make sure all parameters are valid for command
 119                         map<string,string>::iterator it;
 120                         for (it = parameters.begin(); it != parameters.end(); it++) {
 121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
 122                                         abort = true;
 123                                 }
 124                         }
 125
 126                         //initialize outputTypes
 127                         vector<string> tempOutNames;
 128                         outputTypes["list"] = tempOutNames;
 129                         outputTypes["rabund"] = tempOutNames;
 130                         outputTypes["sabund"] = tempOutNames;
 131                         outputTypes["column"] = tempOutNames;
 132
 133                         //if the user changes the output directory command factory will send this info to us in the output parameter
 134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 135
 136                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 138                         if (inputDir == "not found"){   inputDir = "";          }
 139                         else {
 140                                 string path;
 141                                 it = parameters.find("phylip");
 142                                 //user has given a template file
 143                                 if(it != parameters.end()){
 144                                         path = m->hasPath(it->second);
 145                                         //if the user has not given a path then, add inputdir. else leave path alone.
 146                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 147                                 }
 148
 149                                 it = parameters.find("column");
 150                                 //user has given a template file
 151                                 if(it != parameters.end()){
 152                                         path = m->hasPath(it->second);
 153                                         //if the user has not given a path then, add inputdir. else leave path alone.
 154                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 155                                 }
 156
 157                                 it = parameters.find("name");
 158                                 //user has given a template file
 159                                 if(it != parameters.end()){
 160                                         path = m->hasPath(it->second);
 161                                         //if the user has not given a path then, add inputdir. else leave path alone.
 162                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 163                                 }
 164
 165                                 it = parameters.find("taxonomy");
 166                                 //user has given a template file
 167                                 if(it != parameters.end()){
 168                                         path = m->hasPath(it->second);
 169                                         //if the user has not given a path then, add inputdir. else leave path alone.
 170                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 171                                 }
 172
 173                                 it = parameters.find("fasta");
 174                                 //user has given a template file
 175                                 if(it != parameters.end()){
 176                                         path = m->hasPath(it->second);
 177                                         //if the user has not given a path then, add inputdir. else leave path alone.
 178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 179                                 }
 180                         }
 181
 182                         //check for required parameters
 183                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 184                         if (phylipfile == "not open") { abort = true; }
 185                         else if (phylipfile == "not found") { phylipfile = ""; }
 186                         else {  distfile = phylipfile;  format = "phylip";      m->setPhylipFile(phylipfile); }
 187
 188                         columnfile = validParameter.validFile(parameters, "column", true);
 189                         if (columnfile == "not open") { abort = true; }
 190                         else if (columnfile == "not found") { columnfile = ""; }
 191                         else {  distfile = columnfile; format = "column";       m->setColumnFile(columnfile); }
 192
 193                         namefile = validParameter.validFile(parameters, "name", true);
 194                         if (namefile == "not open") { abort = true; }
 195                         else if (namefile == "not found") { namefile = "";  }
 196                         else { m->setNameFile(namefile); }
 197
 198                         fastafile = validParameter.validFile(parameters, "fasta", true);
 199                         if (fastafile == "not open") { abort = true; }
 200                         else if (fastafile == "not found") { fastafile = ""; }
 201                         else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
 202
 203                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 204                         if (taxFile == "not open") { abort = true; }
 205                         else if (taxFile == "not found") { taxFile = ""; }
 206                         else {  m->setTaxonomyFile(taxFile); }
 207
 208                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) {
 209                                 //is there are current file available for either of these?
 210                                 //give priority to column, then phylip, then fasta
 211                                 columnfile = m->getColumnFile();
 212                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
 213                                 else {
 214                                         phylipfile = m->getPhylipFile();
 215                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
 216                                         else {
 217                                                 fastafile = m->getFastaFile();
 218                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
 219                                                 else {
 220                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine();
 221                                                         abort = true;
 222                                                 }
 223                                         }
 224                                 }
 225                         }
 226                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 227
 228                         if (columnfile != "") {
 229                                 if (namefile == "") {
 230                                         namefile = m->getNameFile();
 231                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 232                                         else {
 233                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
 234                                                 abort = true;
 235                                         }
 236                                 }
 237                         }
 238
 239                         if (fastafile != "") {
 240                                 if (taxFile == "") {
 241                                         taxFile = m->getTaxonomyFile();
 242                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
 243                                         else {
 244                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 245                                                 abort = true;
 246                                         }
 247                                 }
 248
 249                                 if (namefile == "") {
 250                                         namefile = m->getNameFile();
 251                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 252                                         else {
 253                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 254                                                 abort = true;
 255                                         }
 256                                 }
 257                         }
 258
 259                         //check for optional parameter and set defaults
 260                         // ...at some point should added some additional type checking...
 261                         //get user cutoff and precision or use defaults
 262                         string temp;
 263                         temp = validParameter.validFile(parameters, "precision", false);
 264                         if (temp == "not found") { temp = "100"; }
 265                         //saves precision legnth for formatting below
 266                         length = temp.length();
 267                         m->mothurConvert(temp, precision);
 268
 269                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
 270                         hard = m->isTrue(temp);
 271
 272                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 273                         large = m->isTrue(temp);
 274
 275                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
 276                         m->setProcessors(temp);
 277                         m->mothurConvert(temp, processors);
 278
 279                         temp = validParameter.validFile(parameters, "splitmethod", false);
 280                         if (splitmethod != "fasta") {
 281                                 if (temp == "not found")  { splitmethod = "distance"; }
 282                                 else {  splitmethod = temp; }
 283                         }
 284
 285                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
 286                         m->mothurConvert(temp, cutoff);
 287                         cutoff += (5 / (precision * 10.0));
 288
 289                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "3"; }
 290                         m->mothurConvert(temp, taxLevelCutoff);
 291
 292                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
 293
 294                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 295                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 296
 297                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 298                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 299
 300                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 301
 302                         showabund = validParameter.validFile(parameters, "showabund", false);
 303                         if (showabund == "not found") { showabund = "T"; }
 304
 305                         timing = validParameter.validFile(parameters, "timing", false);
 306                         if (timing == "not found") { timing = "F"; }
 307
 308                 }
 309         }
 310         catch(exception& e) {
 311                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 312                 exit(1);
 313         }
 314 }
 315
 316 //**********************************************************************************************************************
 317
 318 int ClusterSplitCommand::execute(){
 319         try {
 320
 321                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 322
 323                 time_t estart;
 324                 vector<string> listFileNames;
 325                 set<string> labels;
 326                 string singletonName = "";
 327                 double saveCutoff = cutoff;
 328
 329                 //****************** file prep work ******************************//
 330                 #ifdef USE_MPI
 331                         int pid;
 332                         int tag = 2001;
 333                         MPI_Status status;
 334                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 335                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 336
 337                         if (pid == 0) { //only process 0 converts and splits
 338
 339                 #endif
 340
 341                 //if user gave a phylip file convert to column file
 342                 if (format == "phylip") {
 343                         estart = time(NULL);
 344                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 345
 346                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 347
 348                         NameAssignment* nameMap = NULL;
 349                         convert->setFormat("phylip");
 350                         convert->read(nameMap);
 351
 352                         if (m->control_pressed) {  delete convert;  return 0;  }
 353
 354                         distfile = convert->getOutputFile();
 355
 356                         //if no names file given with phylip file, create it
 357                         ListVector* listToMakeNameFile =  convert->getListVector();
 358                         if (namefile == "") {  //you need to make a namefile for split matrix
 359                                 ofstream out;
 360                                 namefile = phylipfile + ".names";
 361                                 m->openOutputFile(namefile, out);
 362                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 363                                         string bin = listToMakeNameFile->get(i);
 364                                         out << bin << '\t' << bin << endl;
 365                                 }
 366                                 out.close();
 367                         }
 368                         delete listToMakeNameFile;
 369                         delete convert;
 370
 371                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 372                 }
 373                 if (m->control_pressed) { return 0; }
 374
 375                 estart = time(NULL);
 376                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 377
 378                 //split matrix into non-overlapping groups
 379                 SplitMatrix* split;
 380                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 381                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 382                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 383                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 384
 385                 split->split();
 386
 387                 if (m->control_pressed) { delete split; return 0; }
 388
 389                 singletonName = split->getSingletonNames();
 390                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 391                 delete split;
 392
 393                 //output a merged distance file
 394                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 395
 396
 397                 if (m->control_pressed) { return 0; }
 398
 399                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 400                 estart = time(NULL);
 401
 402                 //****************** break up files between processes and cluster each file set ******************************//
 403         #ifdef USE_MPI
 404                         ////you are process 0 from above////
 405
 406                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 407                         dividedNames.resize(processors);
 408
 409                         //for each file group figure out which process will complete it
 410                         //want to divide the load intelligently so the big files are spread between processes
 411                         for (int i = 0; i < distName.size(); i++) {
 412                                 int processToAssign = (i+1) % processors;
 413                                 if (processToAssign == 0) { processToAssign = processors; }
 414
 415                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 416                         }
 417
 418                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 419                         for (int i = 0; i < processors; i++) {
 420                                 int remainder = ((i+1) % processors);
 421                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 422                         }
 423
 424
 425                         //send each child the list of files it needs to process
 426                         for(int i = 1; i < processors; i++) {
 427                                 //send number of file pairs
 428                                 int num = dividedNames[i].size();
 429                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 430
 431                                 for (int j = 0; j < num; j++) { //send filenames to process i
 432                                         char tempDistFileName[1024];
 433                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 434                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 435
 436                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 437                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 438
 439                                         char tempNameFileName[1024];
 440                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 441                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 442
 443                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 444                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 445                                 }
 446                         }
 447
 448                         //process your share
 449                         listFileNames = cluster(dividedNames[0], labels);
 450
 451                         //receive the other processes info
 452                         for(int i = 1; i < processors; i++) {
 453                                 int num = dividedNames[i].size();
 454
 455                                 double tempCutoff;
 456                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 457                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 458
 459                                 //send list filenames to root process
 460                                 for (int j = 0; j < num; j++) {
 461                                         int lengthList = 0;
 462                                         char tempListFileName[1024];
 463
 464                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 465                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 466
 467                                         string myListFileName = tempListFileName;
 468                                         myListFileName = myListFileName.substr(0, lengthList);
 469
 470                                         listFileNames.push_back(myListFileName);
 471                                 }
 472
 473                                 //send Labels to root process
 474                                 int numLabels = 0;
 475                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 476
 477                                 for (int j = 0; j < numLabels; j++) {
 478                                         int lengthLabel = 0;
 479                                         char tempLabel[100];
 480
 481                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 482                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 483
 484                                         string myLabel = tempLabel;
 485                                         myLabel = myLabel.substr(0, lengthLabel);
 486
 487                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 488                                 }
 489                         }
 490
 491                 }else { //you are a child process
 492                         vector < map<string, string> >  myNames;
 493
 494                         //recieve the files you need to process
 495                         //receive number of file pairs
 496                         int num = 0;
 497                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 498
 499                         myNames.resize(num);
 500
 501                         for (int j = 0; j < num; j++) { //receive filenames to process
 502                                 int lengthDist = 0;
 503                                 char tempDistFileName[1024];
 504
 505                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 506                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 507
 508                                 string myDistFileName = tempDistFileName;
 509                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 510
 511                                 int lengthName = 0;
 512                                 char tempNameFileName[1024];
 513
 514                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 515                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 516
 517                                 string myNameFileName = tempNameFileName;
 518                                 myNameFileName = myNameFileName.substr(0, lengthName);
 519
 520                                 //save file name
 521                                 myNames[j][myDistFileName] = myNameFileName;
 522                         }
 523
 524                         //process them
 525                         listFileNames = cluster(myNames, labels);
 526
 527                         //send cutoff
 528                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 529
 530                         //send list filenames to root process
 531                         for (int j = 0; j < num; j++) {
 532                                 char tempListFileName[1024];
 533                                 strcpy(tempListFileName, listFileNames[j].c_str());
 534                                 int lengthList = listFileNames[j].length();
 535
 536                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 537                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 538                         }
 539
 540                         //send Labels to root process
 541                         int numLabels = labels.size();
 542                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 543
 544                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 545                                 char tempLabel[100];
 546                                 strcpy(tempLabel, (*it).c_str());
 547                                 int lengthLabel = (*it).length();
 548
 549                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 550                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 551                         }
 552                 }
 553
 554                 //make everyone wait
 555                 MPI_Barrier(MPI_COMM_WORLD);
 556
 557         #else
 558
 559                 //sanity check
 560                 if (processors > distName.size()) { processors = distName.size(); }
 561
 562                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 563                                 if(processors == 1){
 564                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 565                                 }else{
 566
 567                                         //cout << processors << '\t' << distName.size() << endl;
 568                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 569                                         dividedNames.resize(processors);
 570
 571                                         //for each file group figure out which process will complete it
 572                                         //want to divide the load intelligently so the big files are spread between processes
 573                                         for (int i = 0; i < distName.size(); i++) {
 574                                                 //cout << i << endl;
 575                                                 int processToAssign = (i+1) % processors;
 576                                                 if (processToAssign == 0) { processToAssign = processors; }
 577
 578                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 579                                         }
 580
 581                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 582                                         for (int i = 0; i < processors; i++) {
 583                                                 //cout << i << endl;
 584                                                 int remainder = ((i+1) % processors);
 585                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 586                                         }
 587
 588                                         createProcesses(dividedNames);
 589
 590                                         if (m->control_pressed) { return 0; }
 591
 592                                         //get list of list file names from each process
 593                                         for(int i=0;i<processors;i++){
 594                                                 string filename = toString(processIDS[i]) + ".temp";
 595                                                 ifstream in;
 596                                                 m->openInputFile(filename, in);
 597
 598                                                 in >> tag; m->gobble(in);
 599
 600                                                 while(!in.eof()) {
 601                                                         string tempName;
 602                                                         in >> tempName; m->gobble(in);
 603                                                         listFileNames.push_back(tempName);
 604                                                 }
 605                                                 in.close();
 606                                                 m->mothurRemove((toString(processIDS[i]) + ".temp"));
 607
 608                                                 //get labels
 609                                                 filename = toString(processIDS[i]) + ".temp.labels";
 610                                                 ifstream in2;
 611                                                 m->openInputFile(filename, in2);
 612
 613                                                 float tempCutoff;
 614                                                 in2 >> tempCutoff; m->gobble(in2);
 615                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 616
 617                                                 while(!in2.eof()) {
 618                                                         string tempName;
 619                                                         in2 >> tempName; m->gobble(in2);
 620                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 621                                                 }
 622                                                 in2.close();
 623                                                 m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
 624                                         }
 625                                 }
 626                 #else
 627                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 628                 #endif
 629         #endif
 630                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } return 0; }
 631
 632                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 633
 634                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 635
 636                 //****************** merge list file and create rabund and sabund files ******************************//
 637                 estart = time(NULL);
 638                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 639
 640                 #ifdef USE_MPI
 641                         if (pid == 0) { //only process 0 merges
 642                 #endif
 643
 644                 ListVector* listSingle;
 645                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 646
 647                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
 648
 649                 mergeLists(listFileNames, labelBins, listSingle);
 650
 651                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
 652
 653                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 654
 655                 //set list file as new current listfile
 656                 string current = "";
 657                 itTypes = outputTypes.find("list");
 658                 if (itTypes != outputTypes.end()) {
 659                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
 660                 }
 661
 662                 //set rabund file as new current rabundfile
 663                 itTypes = outputTypes.find("rabund");
 664                 if (itTypes != outputTypes.end()) {
 665                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
 666                 }
 667
 668                 //set sabund file as new current sabundfile
 669                 itTypes = outputTypes.find("sabund");
 670                 if (itTypes != outputTypes.end()) {
 671                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
 672                 }
 673
 674                 m->mothurOutEndLine();
 675                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 676                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 677                 m->mothurOutEndLine();
 678
 679                 #ifdef USE_MPI
 680                         } //only process 0 merges
 681
 682                         //make everyone wait
 683                         MPI_Barrier(MPI_COMM_WORLD);
 684                 #endif
 685
 686                 return 0;
 687         }
 688         catch(exception& e) {
 689                 m->errorOut(e, "ClusterSplitCommand", "execute");
 690                 exit(1);
 691         }
 692 }
 693 //**********************************************************************************************************************
 694 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 695         try {
 696
 697                 map<float, int> labelBin;
 698                 vector<float> orderFloat;
 699                 int numSingleBins;
 700
 701                 //read in singletons
 702                 if (singleton != "none") {
 703                         ifstream in;
 704                         m->openInputFile(singleton, in);
 705
 706                         string firstCol, secondCol;
 707                         listSingle = new ListVector();
 708                         while (!in.eof()) {
 709                                 in >> firstCol >> secondCol; m->gobble(in);
 710                                 listSingle->push_back(secondCol);
 711                         }
 712                         in.close();
 713                         m->mothurRemove(singleton);
 714
 715                         numSingleBins = listSingle->getNumBins();
 716                 }else{  listSingle = NULL; numSingleBins = 0;  }
 717
 718                 //go through users set and make them floats so we can sort them
 719                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 720                         float temp = -10.0;
 721
 722                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 723                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 724
 725                         if (temp <= cutoff) {
 726                                 orderFloat.push_back(temp);
 727                                 labelBin[temp] = numSingleBins; //initialize numbins
 728                         }
 729                 }
 730
 731                 //sort order
 732                 sort(orderFloat.begin(), orderFloat.end());
 733                 userLabels.clear();
 734
 735                 //get the list info from each file
 736                 for (int k = 0; k < listNames.size(); k++) {
 737
 738                         if (m->control_pressed) {
 739                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; m->mothurRemove(singleton);  }
 740                                 for (int i = 0; i < listNames.size(); i++) {   m->mothurRemove(listNames[i]);  }
 741                                 return labelBin;
 742                         }
 743
 744                         InputData* input = new InputData(listNames[k], "list");
 745                         ListVector* list = input->getListVector();
 746                         string lastLabel = list->getLabel();
 747
 748                         string filledInList = listNames[k] + "filledInTemp";
 749                         ofstream outFilled;
 750                         m->openOutputFile(filledInList, outFilled);
 751
 752                         //for each label needed
 753                         for(int l = 0; l < orderFloat.size(); l++){
 754
 755                                 string thisLabel;
 756                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 757                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 758
 759                                 //this file has reached the end
 760                                 if (list == NULL) {
 761                                         list = input->getListVector(lastLabel, true);
 762                                 }else{  //do you have the distance, or do you need to fill in
 763
 764                                         float labelFloat;
 765                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 766                                         else { convert(list->getLabel(), labelFloat); }
 767
 768                                         //check for missing labels
 769                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 770                                                 //if its bigger get last label, otherwise keep it
 771                                                 delete list;
 772                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 773                                         }
 774                                         lastLabel = list->getLabel();
 775                                 }
 776
 777                                 //print to new file
 778                                 list->setLabel(thisLabel);
 779                                 list->print(outFilled);
 780
 781                                 //update labelBin
 782                                 labelBin[orderFloat[l]] += list->getNumBins();
 783
 784                                 delete list;
 785
 786                                 list = input->getListVector();
 787                         }
 788
 789                         if (list != NULL) { delete list; }
 790                         delete input;
 791
 792                         outFilled.close();
 793                         m->mothurRemove(listNames[k]);
 794                         rename(filledInList.c_str(), listNames[k].c_str());
 795                 }
 796
 797                 return labelBin;
 798         }
 799         catch(exception& e) {
 800                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 801                 exit(1);
 802         }
 803 }
 804 //**********************************************************************************************************************
 805 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 806         try {
 807                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 808                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 809
 810                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 811                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 812                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 813
 814                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 815                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 816                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 817
 818                 map<float, int>::iterator itLabel;
 819
 820                 //for each label needed
 821                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 822
 823                         string thisLabel;
 824                         if (itLabel->first == -1) { thisLabel = "unique"; }
 825                         else { thisLabel = toString(itLabel->first,  length-1);  }
 826
 827                         outList << thisLabel << '\t' << itLabel->second << '\t';
 828
 829                         RAbundVector* rabund = new RAbundVector();
 830                         rabund->setLabel(thisLabel);
 831
 832                         //add in singletons
 833                         if (listSingle != NULL) {
 834                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 835                                         outList << listSingle->get(j) << '\t';
 836                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 837                                 }
 838                         }
 839
 840                         //get the list info from each file
 841                         for (int k = 0; k < listNames.size(); k++) {
 842
 843                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]);  } delete rabund; return 0; }
 844
 845                                 InputData* input = new InputData(listNames[k], "list");
 846                                 ListVector* list = input->getListVector(thisLabel);
 847
 848                                 //this file has reached the end
 849                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 850                                 else {
 851                                         for (int j = 0; j < list->getNumBins(); j++) {
 852                                                 outList << list->get(j) << '\t';
 853                                                 rabund->push_back(m->getNumNames(list->get(j)));
 854                                         }
 855                                         delete list;
 856                                 }
 857                                 delete input;
 858                         }
 859
 860                         SAbundVector sabund = rabund->getSAbundVector();
 861
 862                         sabund.print(outSabund);
 863                         rabund->print(outRabund);
 864                         outList << endl;
 865
 866                         delete rabund;
 867                 }
 868
 869                 outList.close();
 870                 outRabund.close();
 871                 outSabund.close();
 872
 873                 if (listSingle != NULL) { delete listSingle;  }
 874
 875                 for (int i = 0; i < listNames.size(); i++) {  m->mothurRemove(listNames[i]);  }
 876
 877                 return 0;
 878         }
 879         catch(exception& e) {
 880                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 881                 exit(1);
 882         }
 883 }
 884
 885 //**********************************************************************************************************************
 886
 887 void ClusterSplitCommand::printData(ListVector* oldList){
 888         try {
 889                 string label = oldList->getLabel();
 890                 RAbundVector oldRAbund = oldList->getRAbundVector();
 891
 892                 oldRAbund.setLabel(label);
 893                 if (m->isTrue(showabund)) {
 894                         oldRAbund.getSAbundVector().print(cout);
 895                 }
 896                 oldRAbund.print(outRabund);
 897                 oldRAbund.getSAbundVector().print(outSabund);
 898
 899                 oldList->print(outList);
 900         }
 901         catch(exception& e) {
 902                 m->errorOut(e, "ClusterSplitCommand", "printData");
 903                 exit(1);
 904         }
 905 }
 906 //**********************************************************************************************************************
 907 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 908         try {
 909
 910         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 911                 int process = 0;
 912                 int exitCommand = 1;
 913                 processIDS.clear();
 914
 915                 //loop through and create all the processes you want
 916                 while (process != processors) {
 917                         int pid = fork();
 918
 919                         if (pid > 0) {
 920                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 921                                 process++;
 922                         }else if (pid == 0){
 923                                 set<string> labels;
 924                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 925
 926                                 //write out names to file
 927                                 string filename = toString(getpid()) + ".temp";
 928                                 ofstream out;
 929                                 m->openOutputFile(filename, out);
 930                                 out << tag << endl;
 931                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 932                                 out.close();
 933
 934                                 //print out labels
 935                                 ofstream outLabels;
 936                                 filename = toString(getpid()) + ".temp.labels";
 937                                 m->openOutputFile(filename, outLabels);
 938
 939                                 outLabels << cutoff << endl;
 940                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 941                                         outLabels << (*it) << endl;
 942                                 }
 943                                 outLabels.close();
 944
 945                                 exit(0);
 946                         }else {
 947                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 948                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 949                                 exit(0);
 950                         }
 951                 }
 952
 953                 //force parent to wait until all the processes are done
 954                 for (int i=0;i<processors;i++) {
 955                         int temp = processIDS[i];
 956                         wait(&temp);
 957                 }
 958
 959                 return exitCommand;
 960         #endif
 961
 962         }
 963         catch(exception& e) {
 964                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 965                 exit(1);
 966         }
 967 }
 968 //**********************************************************************************************************************
 969
 970 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 971         try {
 972                 Cluster* cluster;
 973                 SparseMatrix* matrix;
 974                 ListVector* list;
 975                 ListVector oldList;
 976                 RAbundVector* rabund;
 977
 978                 vector<string> listFileNames;
 979
 980                 double smallestCutoff = cutoff;
 981
 982                 //cluster each distance file
 983                 for (int i = 0; i < distNames.size(); i++) {
 984                         if (m->control_pressed) { return listFileNames; }
 985
 986                         string thisNamefile = distNames[i].begin()->second;
 987                         string thisDistFile = distNames[i].begin()->first;
 988
 989                         #ifdef USE_MPI
 990                                 int pid;
 991                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 992
 993                                 //output your files too
 994                                 if (pid != 0) {
 995                                         cout << endl << "Reading " << thisDistFile << endl;
 996                                 }
 997                         #endif
 998
 999                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
1000
1001                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
1002                         read->setCutoff(cutoff);
1003
1004                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
1005                         nameMap->readMap();
1006                         read->read(nameMap);
1007
1008                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
1009
1010                         list = read->getListVector();
1011                         oldList = *list;
1012                         matrix = read->getMatrix();
1013
1014                         delete read;
1015                         delete nameMap;
1016
1017
1018                         #ifdef USE_MPI
1019                                 //output your files too
1020                                 if (pid != 0) {
1021                                         cout << endl << "Clustering " << thisDistFile << endl;
1022                                 }
1023                         #endif
1024
1025                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1026
1027                         rabund = new RAbundVector(list->getRAbundVector());
1028
1029                         //create cluster
1030                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1031                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1032                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1033                         tag = cluster->getTag();
1034
1035                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1036                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1037
1038                         ofstream listFile;
1039                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1040
1041                         listFileNames.push_back(fileroot+ tag + ".list");
1042
1043                         float previousDist = 0.00000;
1044                         float rndPreviousDist = 0.00000;
1045
1046                         oldList = *list;
1047
1048                         print_start = true;
1049                         start = time(NULL);
1050                         double saveCutoff = cutoff;
1051
1052                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1053
1054                                 if (m->control_pressed) { //clean up
1055                                         delete matrix; delete list;     delete cluster; delete rabund;
1056                                         listFile.close();
1057                                         for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
1058                                         listFileNames.clear(); return listFileNames;
1059                                 }
1060
1061                                 cluster->update(saveCutoff);
1062
1063                                 float dist = matrix->getSmallDist();
1064                                 float rndDist;
1065                                 if (hard) {
1066                                         rndDist = m->ceilDist(dist, precision);
1067                                 }else{
1068                                         rndDist = m->roundDist(dist, precision);
1069                                 }
1070
1071                                 if(previousDist <= 0.0000 && dist != previousDist){
1072                                         oldList.setLabel("unique");
1073                                         oldList.print(listFile);
1074                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1075                                 }
1076                                 else if(rndDist != rndPreviousDist){
1077                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1078                                         oldList.print(listFile);
1079                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1080                                 }
1081
1082                                 previousDist = dist;
1083                                 rndPreviousDist = rndDist;
1084                                 oldList = *list;
1085                         }
1086
1087
1088                         if(previousDist <= 0.0000){
1089                                 oldList.setLabel("unique");
1090                                 oldList.print(listFile);
1091                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1092                         }
1093                         else if(rndPreviousDist<cutoff){
1094                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1095                                 oldList.print(listFile);
1096                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1097                         }
1098
1099                         delete matrix; delete list;     delete cluster; delete rabund;
1100                         listFile.close();
1101
1102                         if (m->control_pressed) { //clean up
1103                                 for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
1104                                 listFileNames.clear(); return listFileNames;
1105                         }
1106
1107                         m->mothurRemove(thisDistFile);
1108                         m->mothurRemove(thisNamefile);
1109
1110                         if (saveCutoff != cutoff) {
1111                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1112                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1113
1114                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1115                         }
1116
1117                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1118                 }
1119
1120                 cutoff = smallestCutoff;
1121
1122                 return listFileNames;
1123
1124         }
1125         catch(exception& e) {
1126                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1127                 exit(1);
1128         }
1129
1130
1131 }
1132 //**********************************************************************************************************************
1133
1134 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1135         try{
1136
1137 #ifdef USE_MPI
1138                 int pid;
1139                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1140
1141                 if (pid != 0) {
1142 #endif
1143
1144                 string thisOutputDir = outputDir;
1145                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1146                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1147                 m->mothurRemove(outputFileName);
1148
1149
1150                 for (int i = 0; i < distNames.size(); i++) {
1151                         if (m->control_pressed) {  return 0; }
1152
1153                         string thisDistFile = distNames[i].begin()->first;
1154
1155                         m->appendFiles(thisDistFile, outputFileName);
1156                 }
1157
1158                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1159
1160 #ifdef USE_MPI
1161                 }
1162 #endif
1163
1164                 return 0;
1165
1166
1167         }
1168         catch(exception& e) {
1169                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1170                 exit(1);
1171         }
1172 }
1173 //**********************************************************************************************************************