clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::setParameters(){
  21         try {
  22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
  23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
  24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
  25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
  26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
  27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
  28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
  29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
  30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
  31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
  32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
  33                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
  34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
  35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
  36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
  37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
  38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
  39
  40                 vector<string> myArray;
  41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
  42                 return myArray;
  43         }
  44         catch(exception& e) {
  45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
  46                 exit(1);
  47         }
  48 }
  49 //**********************************************************************************************************************
  50 string ClusterSplitCommand::getHelpString(){
  51         try {
  52                 string helpString = "";
  53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
  54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
  55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
  56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
  57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
  58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
  59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
  60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
  61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
  62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
  63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
  64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
  65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
  66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
  67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
  68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
  69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
  70 #ifdef USE_MPI
  71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  72 #endif
  73                 helpString += "The cluster.split command should be in the following format: \n";
  74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
  75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";
  76                 return helpString;
  77         }
  78         catch(exception& e) {
  79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
  80                 exit(1);
  81         }
  82 }
  83 //**********************************************************************************************************************
  84 ClusterSplitCommand::ClusterSplitCommand(){
  85         try {
  86                 abort = true; calledHelp = true;
  87                 setParameters();
  88                 vector<string> tempOutNames;
  89                 outputTypes["list"] = tempOutNames;
  90                 outputTypes["rabund"] = tempOutNames;
  91                 outputTypes["sabund"] = tempOutNames;
  92                 outputTypes["column"] = tempOutNames;
  93         }
  94         catch(exception& e) {
  95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  96                 exit(1);
  97         }
  98 }
  99 //**********************************************************************************************************************
 100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
 101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
 102         try{
 103                 abort = false; calledHelp = false;
 104                 format = "";
 105
 106                 //allow user to run help
 107                 if(option == "help") { help(); abort = true; calledHelp = true; }
 108
 109                 else {
 110                         vector<string> myArray = setParameters();
 111
 112                         OptionParser parser(option);
 113                         map<string,string> parameters = parser.getParameters();
 114
 115                         ValidParameters validParameter("cluster.split");
 116
 117                         //check to make sure all parameters are valid for command
 118                         map<string,string>::iterator it;
 119                         for (it = parameters.begin(); it != parameters.end(); it++) {
 120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
 121                                         abort = true;
 122                                 }
 123                         }
 124
 125                         //initialize outputTypes
 126                         vector<string> tempOutNames;
 127                         outputTypes["list"] = tempOutNames;
 128                         outputTypes["rabund"] = tempOutNames;
 129                         outputTypes["sabund"] = tempOutNames;
 130                         outputTypes["column"] = tempOutNames;
 131
 132                         //if the user changes the output directory command factory will send this info to us in the output parameter
 133                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 134
 135                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 136                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 137                         if (inputDir == "not found"){   inputDir = "";          }
 138                         else {
 139                                 string path;
 140                                 it = parameters.find("phylip");
 141                                 //user has given a template file
 142                                 if(it != parameters.end()){
 143                                         path = m->hasPath(it->second);
 144                                         //if the user has not given a path then, add inputdir. else leave path alone.
 145                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 146                                 }
 147
 148                                 it = parameters.find("column");
 149                                 //user has given a template file
 150                                 if(it != parameters.end()){
 151                                         path = m->hasPath(it->second);
 152                                         //if the user has not given a path then, add inputdir. else leave path alone.
 153                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 154                                 }
 155
 156                                 it = parameters.find("name");
 157                                 //user has given a template file
 158                                 if(it != parameters.end()){
 159                                         path = m->hasPath(it->second);
 160                                         //if the user has not given a path then, add inputdir. else leave path alone.
 161                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 162                                 }
 163
 164                                 it = parameters.find("taxonomy");
 165                                 //user has given a template file
 166                                 if(it != parameters.end()){
 167                                         path = m->hasPath(it->second);
 168                                         //if the user has not given a path then, add inputdir. else leave path alone.
 169                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 170                                 }
 171
 172                                 it = parameters.find("fasta");
 173                                 //user has given a template file
 174                                 if(it != parameters.end()){
 175                                         path = m->hasPath(it->second);
 176                                         //if the user has not given a path then, add inputdir. else leave path alone.
 177                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 178                                 }
 179                         }
 180
 181                         //check for required parameters
 182                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 183                         if (phylipfile == "not open") { abort = true; }
 184                         else if (phylipfile == "not found") { phylipfile = ""; }
 185                         else {  distfile = phylipfile;  format = "phylip";      }
 186
 187                         columnfile = validParameter.validFile(parameters, "column", true);
 188                         if (columnfile == "not open") { abort = true; }
 189                         else if (columnfile == "not found") { columnfile = ""; }
 190                         else {  distfile = columnfile; format = "column";       }
 191
 192                         namefile = validParameter.validFile(parameters, "name", true);
 193                         if (namefile == "not open") { abort = true; }
 194                         else if (namefile == "not found") { namefile = ""; }
 195
 196                         fastafile = validParameter.validFile(parameters, "fasta", true);
 197                         if (fastafile == "not open") { abort = true; }
 198                         else if (fastafile == "not found") { fastafile = ""; }
 199                         else { distfile = fastafile;  splitmethod = "fasta";  }
 200
 201                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 202                         if (taxFile == "not open") { abort = true; }
 203                         else if (taxFile == "not found") { taxFile = ""; }
 204
 205                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) {
 206                                 //is there are current file available for either of these?
 207                                 //give priority to column, then phylip, then fasta
 208                                 columnfile = m->getColumnFile();
 209                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
 210                                 else {
 211                                         phylipfile = m->getPhylipFile();
 212                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
 213                                         else {
 214                                                 fastafile = m->getFastaFile();
 215                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
 216                                                 else {
 217                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine();
 218                                                         abort = true;
 219                                                 }
 220                                         }
 221                                 }
 222                         }
 223                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 224
 225                         if (columnfile != "") {
 226                                 if (namefile == "") {
 227                                         namefile = m->getNameFile();
 228                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 229                                         else {
 230                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
 231                                                 abort = true;
 232                                         }
 233                                 }
 234                         }
 235
 236                         if (fastafile != "") {
 237                                 if (taxFile == "") {
 238                                         taxFile = m->getTaxonomyFile();
 239                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
 240                                         else {
 241                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 242                                                 abort = true;
 243                                         }
 244                                 }
 245
 246                                 if (namefile == "") {
 247                                         namefile = m->getNameFile();
 248                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 249                                         else {
 250                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 251                                                 abort = true;
 252                                         }
 253                                 }
 254                         }
 255
 256                         //check for optional parameter and set defaults
 257                         // ...at some point should added some additional type checking...
 258                         //get user cutoff and precision or use defaults
 259                         string temp;
 260                         temp = validParameter.validFile(parameters, "precision", false);
 261                         if (temp == "not found") { temp = "100"; }
 262                         //saves precision legnth for formatting below
 263                         length = temp.length();
 264                         convert(temp, precision);
 265
 266                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
 267                         hard = m->isTrue(temp);
 268
 269                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 270                         large = m->isTrue(temp);
 271
 272                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
 273                         m->setProcessors(temp);
 274                         convert(temp, processors);
 275
 276                         temp = validParameter.validFile(parameters, "splitmethod", false);
 277                         if (splitmethod != "fasta") {
 278                                 if (temp == "not found")  { splitmethod = "distance"; }
 279                                 else {  splitmethod = temp; }
 280                         }
 281
 282                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 283                         convert(temp, cutoff);
 284                         cutoff += (5 / (precision * 10.0));
 285
 286                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 287                         convert(temp, taxLevelCutoff);
 288
 289                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
 290
 291                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 292                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 293
 294                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 295                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 296
 297                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 298
 299                         showabund = validParameter.validFile(parameters, "showabund", false);
 300                         if (showabund == "not found") { showabund = "T"; }
 301
 302                         timing = validParameter.validFile(parameters, "timing", false);
 303                         if (timing == "not found") { timing = "F"; }
 304
 305                 }
 306         }
 307         catch(exception& e) {
 308                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 309                 exit(1);
 310         }
 311 }
 312
 313 //**********************************************************************************************************************
 314
 315 int ClusterSplitCommand::execute(){
 316         try {
 317
 318                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 319
 320                 time_t estart;
 321                 vector<string> listFileNames;
 322                 set<string> labels;
 323                 string singletonName = "";
 324                 double saveCutoff = cutoff;
 325
 326                 //****************** file prep work ******************************//
 327                 #ifdef USE_MPI
 328                         int pid;
 329                         int tag = 2001;
 330                         MPI_Status status;
 331                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 332                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 333
 334                         if (pid == 0) { //only process 0 converts and splits
 335
 336                 #endif
 337
 338                 //if user gave a phylip file convert to column file
 339                 if (format == "phylip") {
 340                         estart = time(NULL);
 341                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 342
 343                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 344
 345                         NameAssignment* nameMap = NULL;
 346                         convert->setFormat("phylip");
 347                         convert->read(nameMap);
 348
 349                         if (m->control_pressed) {  delete convert;  return 0;  }
 350
 351                         distfile = convert->getOutputFile();
 352
 353                         //if no names file given with phylip file, create it
 354                         ListVector* listToMakeNameFile =  convert->getListVector();
 355                         if (namefile == "") {  //you need to make a namefile for split matrix
 356                                 ofstream out;
 357                                 namefile = phylipfile + ".names";
 358                                 m->openOutputFile(namefile, out);
 359                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 360                                         string bin = listToMakeNameFile->get(i);
 361                                         out << bin << '\t' << bin << endl;
 362                                 }
 363                                 out.close();
 364                         }
 365                         delete listToMakeNameFile;
 366                         delete convert;
 367
 368                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 369                 }
 370                 if (m->control_pressed) { return 0; }
 371
 372                 estart = time(NULL);
 373                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 374
 375                 //split matrix into non-overlapping groups
 376                 SplitMatrix* split;
 377                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 378                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 379                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 380                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 381
 382                 split->split();
 383
 384                 if (m->control_pressed) { delete split; return 0; }
 385
 386                 singletonName = split->getSingletonNames();
 387                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 388                 delete split;
 389
 390                 //output a merged distance file
 391                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 392
 393
 394                 if (m->control_pressed) { return 0; }
 395
 396                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 397                 estart = time(NULL);
 398
 399                 //****************** break up files between processes and cluster each file set ******************************//
 400         #ifdef USE_MPI
 401                         ////you are process 0 from above////
 402
 403                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 404                         dividedNames.resize(processors);
 405
 406                         //for each file group figure out which process will complete it
 407                         //want to divide the load intelligently so the big files are spread between processes
 408                         for (int i = 0; i < distName.size(); i++) {
 409                                 int processToAssign = (i+1) % processors;
 410                                 if (processToAssign == 0) { processToAssign = processors; }
 411
 412                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 413                         }
 414
 415                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 416                         for (int i = 0; i < processors; i++) {
 417                                 int remainder = ((i+1) % processors);
 418                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 419                         }
 420
 421
 422                         //send each child the list of files it needs to process
 423                         for(int i = 1; i < processors; i++) {
 424                                 //send number of file pairs
 425                                 int num = dividedNames[i].size();
 426                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 427
 428                                 for (int j = 0; j < num; j++) { //send filenames to process i
 429                                         char tempDistFileName[1024];
 430                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 431                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 432
 433                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 434                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 435
 436                                         char tempNameFileName[1024];
 437                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 438                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 439
 440                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 441                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 442                                 }
 443                         }
 444
 445                         //process your share
 446                         listFileNames = cluster(dividedNames[0], labels);
 447
 448                         //receive the other processes info
 449                         for(int i = 1; i < processors; i++) {
 450                                 int num = dividedNames[i].size();
 451
 452                                 double tempCutoff;
 453                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 454                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 455
 456                                 //send list filenames to root process
 457                                 for (int j = 0; j < num; j++) {
 458                                         int lengthList = 0;
 459                                         char tempListFileName[1024];
 460
 461                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 462                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 463
 464                                         string myListFileName = tempListFileName;
 465                                         myListFileName = myListFileName.substr(0, lengthList);
 466
 467                                         listFileNames.push_back(myListFileName);
 468                                 }
 469
 470                                 //send Labels to root process
 471                                 int numLabels = 0;
 472                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 473
 474                                 for (int j = 0; j < numLabels; j++) {
 475                                         int lengthLabel = 0;
 476                                         char tempLabel[100];
 477
 478                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 479                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 480
 481                                         string myLabel = tempLabel;
 482                                         myLabel = myLabel.substr(0, lengthLabel);
 483
 484                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 485                                 }
 486                         }
 487
 488                 }else { //you are a child process
 489                         vector < map<string, string> >  myNames;
 490
 491                         //recieve the files you need to process
 492                         //receive number of file pairs
 493                         int num = 0;
 494                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 495
 496                         myNames.resize(num);
 497
 498                         for (int j = 0; j < num; j++) { //receive filenames to process
 499                                 int lengthDist = 0;
 500                                 char tempDistFileName[1024];
 501
 502                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 503                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 504
 505                                 string myDistFileName = tempDistFileName;
 506                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 507
 508                                 int lengthName = 0;
 509                                 char tempNameFileName[1024];
 510
 511                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 512                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 513
 514                                 string myNameFileName = tempNameFileName;
 515                                 myNameFileName = myNameFileName.substr(0, lengthName);
 516
 517                                 //save file name
 518                                 myNames[j][myDistFileName] = myNameFileName;
 519                         }
 520
 521                         //process them
 522                         listFileNames = cluster(myNames, labels);
 523
 524                         //send cutoff
 525                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 526
 527                         //send list filenames to root process
 528                         for (int j = 0; j < num; j++) {
 529                                 char tempListFileName[1024];
 530                                 strcpy(tempListFileName, listFileNames[j].c_str());
 531                                 int lengthList = listFileNames[j].length();
 532
 533                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 534                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 535                         }
 536
 537                         //send Labels to root process
 538                         int numLabels = labels.size();
 539                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 540
 541                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 542                                 char tempLabel[100];
 543                                 strcpy(tempLabel, (*it).c_str());
 544                                 int lengthLabel = (*it).length();
 545
 546                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 547                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 548                         }
 549                 }
 550
 551                 //make everyone wait
 552                 MPI_Barrier(MPI_COMM_WORLD);
 553
 554         #else
 555
 556                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 557                                 if(processors == 1){
 558                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 559                                 }else{
 560                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 561                                         dividedNames.resize(processors);
 562
 563                                         //for each file group figure out which process will complete it
 564                                         //want to divide the load intelligently so the big files are spread between processes
 565                                         for (int i = 0; i < distName.size(); i++) {
 566                                                 int processToAssign = (i+1) % processors;
 567                                                 if (processToAssign == 0) { processToAssign = processors; }
 568
 569                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 570                                         }
 571
 572                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 573                                         for (int i = 0; i < processors; i++) {
 574                                                 int remainder = ((i+1) % processors);
 575                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 576                                         }
 577
 578                                         createProcesses(dividedNames);
 579
 580                                         if (m->control_pressed) { return 0; }
 581
 582                                         //get list of list file names from each process
 583                                         for(int i=0;i<processors;i++){
 584                                                 string filename = toString(processIDS[i]) + ".temp";
 585                                                 ifstream in;
 586                                                 m->openInputFile(filename, in);
 587
 588                                                 in >> tag; m->gobble(in);
 589
 590                                                 while(!in.eof()) {
 591                                                         string tempName;
 592                                                         in >> tempName; m->gobble(in);
 593                                                         listFileNames.push_back(tempName);
 594                                                 }
 595                                                 in.close();
 596                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 597
 598                                                 //get labels
 599                                                 filename = toString(processIDS[i]) + ".temp.labels";
 600                                                 ifstream in2;
 601                                                 m->openInputFile(filename, in2);
 602
 603                                                 float tempCutoff;
 604                                                 in2 >> tempCutoff; m->gobble(in2);
 605                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 606
 607                                                 while(!in2.eof()) {
 608                                                         string tempName;
 609                                                         in2 >> tempName; m->gobble(in2);
 610                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 611                                                 }
 612                                                 in2.close();
 613                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 614                                         }
 615                                 }
 616                 #else
 617                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 618                 #endif
 619         #endif
 620                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 621
 622                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 623
 624                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 625
 626                 //****************** merge list file and create rabund and sabund files ******************************//
 627                 estart = time(NULL);
 628                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 629
 630                 #ifdef USE_MPI
 631                         if (pid == 0) { //only process 0 merges
 632                 #endif
 633
 634                 ListVector* listSingle;
 635                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 636
 637                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 638
 639                 mergeLists(listFileNames, labelBins, listSingle);
 640
 641                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 642
 643                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 644
 645                 //set list file as new current listfile
 646                 string current = "";
 647                 itTypes = outputTypes.find("list");
 648                 if (itTypes != outputTypes.end()) {
 649                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
 650                 }
 651
 652                 //set rabund file as new current rabundfile
 653                 itTypes = outputTypes.find("rabund");
 654                 if (itTypes != outputTypes.end()) {
 655                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
 656                 }
 657
 658                 //set sabund file as new current sabundfile
 659                 itTypes = outputTypes.find("sabund");
 660                 if (itTypes != outputTypes.end()) {
 661                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
 662                 }
 663
 664                 m->mothurOutEndLine();
 665                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 666                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 667                 m->mothurOutEndLine();
 668
 669                 #ifdef USE_MPI
 670                         } //only process 0 merges
 671
 672                         //make everyone wait
 673                         MPI_Barrier(MPI_COMM_WORLD);
 674                 #endif
 675
 676                 return 0;
 677         }
 678         catch(exception& e) {
 679                 m->errorOut(e, "ClusterSplitCommand", "execute");
 680                 exit(1);
 681         }
 682 }
 683 //**********************************************************************************************************************
 684 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 685         try {
 686
 687                 map<float, int> labelBin;
 688                 vector<float> orderFloat;
 689                 int numSingleBins;
 690
 691                 //read in singletons
 692                 if (singleton != "none") {
 693                         ifstream in;
 694                         m->openInputFile(singleton, in);
 695
 696                         string firstCol, secondCol;
 697                         listSingle = new ListVector();
 698                         while (!in.eof()) {
 699                                 in >> firstCol >> secondCol; m->gobble(in);
 700                                 listSingle->push_back(secondCol);
 701                         }
 702                         in.close();
 703                         remove(singleton.c_str());
 704
 705                         numSingleBins = listSingle->getNumBins();
 706                 }else{  listSingle = NULL; numSingleBins = 0;  }
 707
 708                 //go through users set and make them floats so we can sort them
 709                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 710                         float temp = -10.0;
 711
 712                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 713                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 714
 715                         if (temp <= cutoff) {
 716                                 orderFloat.push_back(temp);
 717                                 labelBin[temp] = numSingleBins; //initialize numbins
 718                         }
 719                 }
 720
 721                 //sort order
 722                 sort(orderFloat.begin(), orderFloat.end());
 723                 userLabels.clear();
 724
 725                 //get the list info from each file
 726                 for (int k = 0; k < listNames.size(); k++) {
 727
 728                         if (m->control_pressed) {
 729                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 730                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 731                                 return labelBin;
 732                         }
 733
 734                         InputData* input = new InputData(listNames[k], "list");
 735                         ListVector* list = input->getListVector();
 736                         string lastLabel = list->getLabel();
 737
 738                         string filledInList = listNames[k] + "filledInTemp";
 739                         ofstream outFilled;
 740                         m->openOutputFile(filledInList, outFilled);
 741
 742                         //for each label needed
 743                         for(int l = 0; l < orderFloat.size(); l++){
 744
 745                                 string thisLabel;
 746                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 747                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 748
 749                                 //this file has reached the end
 750                                 if (list == NULL) {
 751                                         list = input->getListVector(lastLabel, true);
 752                                 }else{  //do you have the distance, or do you need to fill in
 753
 754                                         float labelFloat;
 755                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 756                                         else { convert(list->getLabel(), labelFloat); }
 757
 758                                         //check for missing labels
 759                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 760                                                 //if its bigger get last label, otherwise keep it
 761                                                 delete list;
 762                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 763                                         }
 764                                         lastLabel = list->getLabel();
 765                                 }
 766
 767                                 //print to new file
 768                                 list->setLabel(thisLabel);
 769                                 list->print(outFilled);
 770
 771                                 //update labelBin
 772                                 labelBin[orderFloat[l]] += list->getNumBins();
 773
 774                                 delete list;
 775
 776                                 list = input->getListVector();
 777                         }
 778
 779                         if (list != NULL) { delete list; }
 780                         delete input;
 781
 782                         outFilled.close();
 783                         remove(listNames[k].c_str());
 784                         rename(filledInList.c_str(), listNames[k].c_str());
 785                 }
 786
 787                 return labelBin;
 788         }
 789         catch(exception& e) {
 790                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 791                 exit(1);
 792         }
 793 }
 794 //**********************************************************************************************************************
 795 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 796         try {
 797                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 798                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 799
 800                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 801                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 802                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 803
 804                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 805                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 806                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 807
 808                 map<float, int>::iterator itLabel;
 809
 810                 //for each label needed
 811                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 812
 813                         string thisLabel;
 814                         if (itLabel->first == -1) { thisLabel = "unique"; }
 815                         else { thisLabel = toString(itLabel->first,  length-1);  }
 816
 817                         outList << thisLabel << '\t' << itLabel->second << '\t';
 818
 819                         RAbundVector* rabund = new RAbundVector();
 820                         rabund->setLabel(thisLabel);
 821
 822                         //add in singletons
 823                         if (listSingle != NULL) {
 824                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 825                                         outList << listSingle->get(j) << '\t';
 826                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 827                                 }
 828                         }
 829
 830                         //get the list info from each file
 831                         for (int k = 0; k < listNames.size(); k++) {
 832
 833                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 834
 835                                 InputData* input = new InputData(listNames[k], "list");
 836                                 ListVector* list = input->getListVector(thisLabel);
 837
 838                                 //this file has reached the end
 839                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 840                                 else {
 841                                         for (int j = 0; j < list->getNumBins(); j++) {
 842                                                 outList << list->get(j) << '\t';
 843                                                 rabund->push_back(m->getNumNames(list->get(j)));
 844                                         }
 845                                         delete list;
 846                                 }
 847                                 delete input;
 848                         }
 849
 850                         SAbundVector sabund = rabund->getSAbundVector();
 851
 852                         sabund.print(outSabund);
 853                         rabund->print(outRabund);
 854                         outList << endl;
 855
 856                         delete rabund;
 857                 }
 858
 859                 outList.close();
 860                 outRabund.close();
 861                 outSabund.close();
 862
 863                 if (listSingle != NULL) { delete listSingle;  }
 864
 865                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 866
 867                 return 0;
 868         }
 869         catch(exception& e) {
 870                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 871                 exit(1);
 872         }
 873 }
 874
 875 //**********************************************************************************************************************
 876
 877 void ClusterSplitCommand::printData(ListVector* oldList){
 878         try {
 879                 string label = oldList->getLabel();
 880                 RAbundVector oldRAbund = oldList->getRAbundVector();
 881
 882                 oldRAbund.setLabel(label);
 883                 if (m->isTrue(showabund)) {
 884                         oldRAbund.getSAbundVector().print(cout);
 885                 }
 886                 oldRAbund.print(outRabund);
 887                 oldRAbund.getSAbundVector().print(outSabund);
 888
 889                 oldList->print(outList);
 890         }
 891         catch(exception& e) {
 892                 m->errorOut(e, "ClusterSplitCommand", "printData");
 893                 exit(1);
 894         }
 895 }
 896 //**********************************************************************************************************************
 897 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 898         try {
 899
 900         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 901                 int process = 0;
 902                 int exitCommand = 1;
 903                 processIDS.clear();
 904
 905                 //loop through and create all the processes you want
 906                 while (process != processors) {
 907                         int pid = fork();
 908
 909                         if (pid > 0) {
 910                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 911                                 process++;
 912                         }else if (pid == 0){
 913                                 set<string> labels;
 914                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 915
 916                                 //write out names to file
 917                                 string filename = toString(getpid()) + ".temp";
 918                                 ofstream out;
 919                                 m->openOutputFile(filename, out);
 920                                 out << tag << endl;
 921                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 922                                 out.close();
 923
 924                                 //print out labels
 925                                 ofstream outLabels;
 926                                 filename = toString(getpid()) + ".temp.labels";
 927                                 m->openOutputFile(filename, outLabels);
 928
 929                                 outLabels << cutoff << endl;
 930                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 931                                         outLabels << (*it) << endl;
 932                                 }
 933                                 outLabels.close();
 934
 935                                 exit(0);
 936                         }else {
 937                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 938                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 939                                 exit(0);
 940                         }
 941                 }
 942
 943                 //force parent to wait until all the processes are done
 944                 for (int i=0;i<processors;i++) {
 945                         int temp = processIDS[i];
 946                         wait(&temp);
 947                 }
 948
 949                 return exitCommand;
 950         #endif
 951
 952         }
 953         catch(exception& e) {
 954                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 955                 exit(1);
 956         }
 957 }
 958 //**********************************************************************************************************************
 959
 960 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 961         try {
 962                 Cluster* cluster;
 963                 SparseMatrix* matrix;
 964                 ListVector* list;
 965                 ListVector oldList;
 966                 RAbundVector* rabund;
 967
 968                 vector<string> listFileNames;
 969
 970                 double smallestCutoff = cutoff;
 971
 972                 //cluster each distance file
 973                 for (int i = 0; i < distNames.size(); i++) {
 974                         if (m->control_pressed) { return listFileNames; }
 975
 976                         string thisNamefile = distNames[i].begin()->second;
 977                         string thisDistFile = distNames[i].begin()->first;
 978
 979                         #ifdef USE_MPI
 980                                 int pid;
 981                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 982
 983                                 //output your files too
 984                                 if (pid != 0) {
 985                                         cout << endl << "Reading " << thisDistFile << endl;
 986                                 }
 987                         #endif
 988
 989                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 990
 991                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 992                         read->setCutoff(cutoff);
 993
 994                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 995                         nameMap->readMap();
 996                         read->read(nameMap);
 997
 998                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
 999
1000                         list = read->getListVector();
1001                         oldList = *list;
1002                         matrix = read->getMatrix();
1003
1004                         delete read;
1005                         delete nameMap;
1006
1007
1008                         #ifdef USE_MPI
1009                                 //output your files too
1010                                 if (pid != 0) {
1011                                         cout << endl << "Clustering " << thisDistFile << endl;
1012                                 }
1013                         #endif
1014
1015                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1016
1017                         rabund = new RAbundVector(list->getRAbundVector());
1018
1019                         //create cluster
1020                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1021                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1022                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1023                         tag = cluster->getTag();
1024
1025                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1026                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1027
1028                         ofstream listFile;
1029                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1030
1031                         listFileNames.push_back(fileroot+ tag + ".list");
1032
1033                         float previousDist = 0.00000;
1034                         float rndPreviousDist = 0.00000;
1035
1036                         oldList = *list;
1037
1038                         print_start = true;
1039                         start = time(NULL);
1040                         double saveCutoff = cutoff;
1041
1042                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1043
1044                                 if (m->control_pressed) { //clean up
1045                                         delete matrix; delete list;     delete cluster; delete rabund;
1046                                         listFile.close();
1047                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1048                                         listFileNames.clear(); return listFileNames;
1049                                 }
1050
1051                                 cluster->update(saveCutoff);
1052
1053                                 float dist = matrix->getSmallDist();
1054                                 float rndDist;
1055                                 if (hard) {
1056                                         rndDist = m->ceilDist(dist, precision);
1057                                 }else{
1058                                         rndDist = m->roundDist(dist, precision);
1059                                 }
1060
1061                                 if(previousDist <= 0.0000 && dist != previousDist){
1062                                         oldList.setLabel("unique");
1063                                         oldList.print(listFile);
1064                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1065                                 }
1066                                 else if(rndDist != rndPreviousDist){
1067                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1068                                         oldList.print(listFile);
1069                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1070                                 }
1071
1072                                 previousDist = dist;
1073                                 rndPreviousDist = rndDist;
1074                                 oldList = *list;
1075                         }
1076
1077
1078                         if(previousDist <= 0.0000){
1079                                 oldList.setLabel("unique");
1080                                 oldList.print(listFile);
1081                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1082                         }
1083                         else if(rndPreviousDist<cutoff){
1084                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1085                                 oldList.print(listFile);
1086                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1087                         }
1088
1089                         delete matrix; delete list;     delete cluster; delete rabund;
1090                         listFile.close();
1091
1092                         if (m->control_pressed) { //clean up
1093                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1094                                 listFileNames.clear(); return listFileNames;
1095                         }
1096
1097                         remove(thisDistFile.c_str());
1098                         remove(thisNamefile.c_str());
1099
1100                         if (saveCutoff != cutoff) {
1101                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1102                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1103
1104                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1105                         }
1106
1107                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1108                 }
1109
1110                 cutoff = smallestCutoff;
1111
1112                 return listFileNames;
1113
1114         }
1115         catch(exception& e) {
1116                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1117                 exit(1);
1118         }
1119
1120
1121 }
1122 //**********************************************************************************************************************
1123
1124 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1125         try{
1126
1127 #ifdef USE_MPI
1128                 int pid;
1129                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1130
1131                 if (pid != 0) {
1132 #endif
1133
1134                 string thisOutputDir = outputDir;
1135                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1136                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1137                 remove(outputFileName.c_str());
1138
1139
1140                 for (int i = 0; i < distNames.size(); i++) {
1141                         if (m->control_pressed) {  return 0; }
1142
1143                         string thisDistFile = distNames[i].begin()->first;
1144
1145                         m->appendFiles(thisDistFile, outputFileName);
1146                 }
1147
1148                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1149
1150 #ifdef USE_MPI
1151                 }
1152 #endif
1153
1154                 return 0;
1155
1156
1157         }
1158         catch(exception& e) {
1159                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1160                 exit(1);
1161         }
1162 }
1163 //**********************************************************************************************************************