clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::setParameters(){
  21         try {
  22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
  23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
  24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
  25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
  26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
  27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
  28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
  29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
  30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
  31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
  32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
  33                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
  34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
  35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
  36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
  37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
  38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
  39
  40                 vector<string> myArray;
  41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
  42                 return myArray;
  43         }
  44         catch(exception& e) {
  45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
  46                 exit(1);
  47         }
  48 }
  49 //**********************************************************************************************************************
  50 string ClusterSplitCommand::getHelpString(){
  51         try {
  52                 string helpString = "";
  53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
  54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
  55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
  56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
  57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
  58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
  59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
  60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
  61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
  62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
  63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
  64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
  65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
  66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
  67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
  68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
  69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
  70 #ifdef USE_MPI
  71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  72 #endif
  73                 helpString += "The cluster.split command should be in the following format: \n";
  74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
  75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";
  76                 return helpString;
  77         }
  78         catch(exception& e) {
  79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
  80                 exit(1);
  81         }
  82 }
  83 //**********************************************************************************************************************
  84 ClusterSplitCommand::ClusterSplitCommand(){
  85         try {
  86                 abort = true; calledHelp = true;
  87                 setParameters();
  88                 vector<string> tempOutNames;
  89                 outputTypes["list"] = tempOutNames;
  90                 outputTypes["rabund"] = tempOutNames;
  91                 outputTypes["sabund"] = tempOutNames;
  92                 outputTypes["column"] = tempOutNames;
  93         }
  94         catch(exception& e) {
  95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  96                 exit(1);
  97         }
  98 }
  99 //**********************************************************************************************************************
 100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
 101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
 102         try{
 103                 abort = false; calledHelp = false;
 104                 format = "";
 105
 106                 //allow user to run help
 107                 if(option == "help") { help(); abort = true; calledHelp = true; }
 108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
 109
 110                 else {
 111                         vector<string> myArray = setParameters();
 112
 113                         OptionParser parser(option);
 114                         map<string,string> parameters = parser.getParameters();
 115
 116                         ValidParameters validParameter("cluster.split");
 117
 118                         //check to make sure all parameters are valid for command
 119                         map<string,string>::iterator it;
 120                         for (it = parameters.begin(); it != parameters.end(); it++) {
 121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
 122                                         abort = true;
 123                                 }
 124                         }
 125
 126                         //initialize outputTypes
 127                         vector<string> tempOutNames;
 128                         outputTypes["list"] = tempOutNames;
 129                         outputTypes["rabund"] = tempOutNames;
 130                         outputTypes["sabund"] = tempOutNames;
 131                         outputTypes["column"] = tempOutNames;
 132
 133                         //if the user changes the output directory command factory will send this info to us in the output parameter
 134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 135
 136                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 138                         if (inputDir == "not found"){   inputDir = "";          }
 139                         else {
 140                                 string path;
 141                                 it = parameters.find("phylip");
 142                                 //user has given a template file
 143                                 if(it != parameters.end()){
 144                                         path = m->hasPath(it->second);
 145                                         //if the user has not given a path then, add inputdir. else leave path alone.
 146                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 147                                 }
 148
 149                                 it = parameters.find("column");
 150                                 //user has given a template file
 151                                 if(it != parameters.end()){
 152                                         path = m->hasPath(it->second);
 153                                         //if the user has not given a path then, add inputdir. else leave path alone.
 154                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 155                                 }
 156
 157                                 it = parameters.find("name");
 158                                 //user has given a template file
 159                                 if(it != parameters.end()){
 160                                         path = m->hasPath(it->second);
 161                                         //if the user has not given a path then, add inputdir. else leave path alone.
 162                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 163                                 }
 164
 165                                 it = parameters.find("taxonomy");
 166                                 //user has given a template file
 167                                 if(it != parameters.end()){
 168                                         path = m->hasPath(it->second);
 169                                         //if the user has not given a path then, add inputdir. else leave path alone.
 170                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 171                                 }
 172
 173                                 it = parameters.find("fasta");
 174                                 //user has given a template file
 175                                 if(it != parameters.end()){
 176                                         path = m->hasPath(it->second);
 177                                         //if the user has not given a path then, add inputdir. else leave path alone.
 178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 179                                 }
 180                         }
 181
 182                         //check for required parameters
 183                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 184                         if (phylipfile == "not open") { abort = true; }
 185                         else if (phylipfile == "not found") { phylipfile = ""; }
 186                         else {  distfile = phylipfile;  format = "phylip";      }
 187
 188                         columnfile = validParameter.validFile(parameters, "column", true);
 189                         if (columnfile == "not open") { abort = true; }
 190                         else if (columnfile == "not found") { columnfile = ""; }
 191                         else {  distfile = columnfile; format = "column";       }
 192
 193                         namefile = validParameter.validFile(parameters, "name", true);
 194                         if (namefile == "not open") { abort = true; }
 195                         else if (namefile == "not found") { namefile = ""; }
 196
 197                         fastafile = validParameter.validFile(parameters, "fasta", true);
 198                         if (fastafile == "not open") { abort = true; }
 199                         else if (fastafile == "not found") { fastafile = ""; }
 200                         else { distfile = fastafile;  splitmethod = "fasta";  }
 201
 202                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 203                         if (taxFile == "not open") { abort = true; }
 204                         else if (taxFile == "not found") { taxFile = ""; }
 205
 206                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) {
 207                                 //is there are current file available for either of these?
 208                                 //give priority to column, then phylip, then fasta
 209                                 columnfile = m->getColumnFile();
 210                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
 211                                 else {
 212                                         phylipfile = m->getPhylipFile();
 213                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
 214                                         else {
 215                                                 fastafile = m->getFastaFile();
 216                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
 217                                                 else {
 218                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine();
 219                                                         abort = true;
 220                                                 }
 221                                         }
 222                                 }
 223                         }
 224                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 225
 226                         if (columnfile != "") {
 227                                 if (namefile == "") {
 228                                         namefile = m->getNameFile();
 229                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 230                                         else {
 231                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
 232                                                 abort = true;
 233                                         }
 234                                 }
 235                         }
 236
 237                         if (fastafile != "") {
 238                                 if (taxFile == "") {
 239                                         taxFile = m->getTaxonomyFile();
 240                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
 241                                         else {
 242                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 243                                                 abort = true;
 244                                         }
 245                                 }
 246
 247                                 if (namefile == "") {
 248                                         namefile = m->getNameFile();
 249                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 250                                         else {
 251                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 252                                                 abort = true;
 253                                         }
 254                                 }
 255                         }
 256
 257                         //check for optional parameter and set defaults
 258                         // ...at some point should added some additional type checking...
 259                         //get user cutoff and precision or use defaults
 260                         string temp;
 261                         temp = validParameter.validFile(parameters, "precision", false);
 262                         if (temp == "not found") { temp = "100"; }
 263                         //saves precision legnth for formatting below
 264                         length = temp.length();
 265                         convert(temp, precision);
 266
 267                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
 268                         hard = m->isTrue(temp);
 269
 270                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 271                         large = m->isTrue(temp);
 272
 273                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
 274                         m->setProcessors(temp);
 275                         convert(temp, processors);
 276
 277                         temp = validParameter.validFile(parameters, "splitmethod", false);
 278                         if (splitmethod != "fasta") {
 279                                 if (temp == "not found")  { splitmethod = "distance"; }
 280                                 else {  splitmethod = temp; }
 281                         }
 282
 283                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 284                         convert(temp, cutoff);
 285                         cutoff += (5 / (precision * 10.0));
 286
 287                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 288                         convert(temp, taxLevelCutoff);
 289
 290                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
 291
 292                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 293                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 294
 295                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 296                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 297
 298                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 299
 300                         showabund = validParameter.validFile(parameters, "showabund", false);
 301                         if (showabund == "not found") { showabund = "T"; }
 302
 303                         timing = validParameter.validFile(parameters, "timing", false);
 304                         if (timing == "not found") { timing = "F"; }
 305
 306                 }
 307         }
 308         catch(exception& e) {
 309                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 310                 exit(1);
 311         }
 312 }
 313
 314 //**********************************************************************************************************************
 315
 316 int ClusterSplitCommand::execute(){
 317         try {
 318
 319                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 320
 321                 time_t estart;
 322                 vector<string> listFileNames;
 323                 set<string> labels;
 324                 string singletonName = "";
 325                 double saveCutoff = cutoff;
 326
 327                 //****************** file prep work ******************************//
 328                 #ifdef USE_MPI
 329                         int pid;
 330                         int tag = 2001;
 331                         MPI_Status status;
 332                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 333                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 334
 335                         if (pid == 0) { //only process 0 converts and splits
 336
 337                 #endif
 338
 339                 //if user gave a phylip file convert to column file
 340                 if (format == "phylip") {
 341                         estart = time(NULL);
 342                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 343
 344                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 345
 346                         NameAssignment* nameMap = NULL;
 347                         convert->setFormat("phylip");
 348                         convert->read(nameMap);
 349
 350                         if (m->control_pressed) {  delete convert;  return 0;  }
 351
 352                         distfile = convert->getOutputFile();
 353
 354                         //if no names file given with phylip file, create it
 355                         ListVector* listToMakeNameFile =  convert->getListVector();
 356                         if (namefile == "") {  //you need to make a namefile for split matrix
 357                                 ofstream out;
 358                                 namefile = phylipfile + ".names";
 359                                 m->openOutputFile(namefile, out);
 360                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 361                                         string bin = listToMakeNameFile->get(i);
 362                                         out << bin << '\t' << bin << endl;
 363                                 }
 364                                 out.close();
 365                         }
 366                         delete listToMakeNameFile;
 367                         delete convert;
 368
 369                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 370                 }
 371                 if (m->control_pressed) { return 0; }
 372
 373                 estart = time(NULL);
 374                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 375
 376                 //split matrix into non-overlapping groups
 377                 SplitMatrix* split;
 378                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 379                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 380                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 381                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 382
 383                 split->split();
 384
 385                 if (m->control_pressed) { delete split; return 0; }
 386
 387                 singletonName = split->getSingletonNames();
 388                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 389                 delete split;
 390
 391                 //output a merged distance file
 392                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 393
 394
 395                 if (m->control_pressed) { return 0; }
 396
 397                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 398                 estart = time(NULL);
 399
 400                 //****************** break up files between processes and cluster each file set ******************************//
 401         #ifdef USE_MPI
 402                         ////you are process 0 from above////
 403
 404                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 405                         dividedNames.resize(processors);
 406
 407                         //for each file group figure out which process will complete it
 408                         //want to divide the load intelligently so the big files are spread between processes
 409                         for (int i = 0; i < distName.size(); i++) {
 410                                 int processToAssign = (i+1) % processors;
 411                                 if (processToAssign == 0) { processToAssign = processors; }
 412
 413                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 414                         }
 415
 416                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 417                         for (int i = 0; i < processors; i++) {
 418                                 int remainder = ((i+1) % processors);
 419                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 420                         }
 421
 422
 423                         //send each child the list of files it needs to process
 424                         for(int i = 1; i < processors; i++) {
 425                                 //send number of file pairs
 426                                 int num = dividedNames[i].size();
 427                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 428
 429                                 for (int j = 0; j < num; j++) { //send filenames to process i
 430                                         char tempDistFileName[1024];
 431                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 432                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 433
 434                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 435                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 436
 437                                         char tempNameFileName[1024];
 438                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 439                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 440
 441                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 442                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 443                                 }
 444                         }
 445
 446                         //process your share
 447                         listFileNames = cluster(dividedNames[0], labels);
 448
 449                         //receive the other processes info
 450                         for(int i = 1; i < processors; i++) {
 451                                 int num = dividedNames[i].size();
 452
 453                                 double tempCutoff;
 454                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 455                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 456
 457                                 //send list filenames to root process
 458                                 for (int j = 0; j < num; j++) {
 459                                         int lengthList = 0;
 460                                         char tempListFileName[1024];
 461
 462                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 463                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 464
 465                                         string myListFileName = tempListFileName;
 466                                         myListFileName = myListFileName.substr(0, lengthList);
 467
 468                                         listFileNames.push_back(myListFileName);
 469                                 }
 470
 471                                 //send Labels to root process
 472                                 int numLabels = 0;
 473                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 474
 475                                 for (int j = 0; j < numLabels; j++) {
 476                                         int lengthLabel = 0;
 477                                         char tempLabel[100];
 478
 479                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 480                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 481
 482                                         string myLabel = tempLabel;
 483                                         myLabel = myLabel.substr(0, lengthLabel);
 484
 485                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 486                                 }
 487                         }
 488
 489                 }else { //you are a child process
 490                         vector < map<string, string> >  myNames;
 491
 492                         //recieve the files you need to process
 493                         //receive number of file pairs
 494                         int num = 0;
 495                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 496
 497                         myNames.resize(num);
 498
 499                         for (int j = 0; j < num; j++) { //receive filenames to process
 500                                 int lengthDist = 0;
 501                                 char tempDistFileName[1024];
 502
 503                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 504                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 505
 506                                 string myDistFileName = tempDistFileName;
 507                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 508
 509                                 int lengthName = 0;
 510                                 char tempNameFileName[1024];
 511
 512                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 513                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 514
 515                                 string myNameFileName = tempNameFileName;
 516                                 myNameFileName = myNameFileName.substr(0, lengthName);
 517
 518                                 //save file name
 519                                 myNames[j][myDistFileName] = myNameFileName;
 520                         }
 521
 522                         //process them
 523                         listFileNames = cluster(myNames, labels);
 524
 525                         //send cutoff
 526                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 527
 528                         //send list filenames to root process
 529                         for (int j = 0; j < num; j++) {
 530                                 char tempListFileName[1024];
 531                                 strcpy(tempListFileName, listFileNames[j].c_str());
 532                                 int lengthList = listFileNames[j].length();
 533
 534                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 535                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 536                         }
 537
 538                         //send Labels to root process
 539                         int numLabels = labels.size();
 540                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 541
 542                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 543                                 char tempLabel[100];
 544                                 strcpy(tempLabel, (*it).c_str());
 545                                 int lengthLabel = (*it).length();
 546
 547                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 548                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 549                         }
 550                 }
 551
 552                 //make everyone wait
 553                 MPI_Barrier(MPI_COMM_WORLD);
 554
 555         #else
 556
 557                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 558                                 if(processors == 1){
 559                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 560                                 }else{
 561                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 562                                         dividedNames.resize(processors);
 563
 564                                         //for each file group figure out which process will complete it
 565                                         //want to divide the load intelligently so the big files are spread between processes
 566                                         for (int i = 0; i < distName.size(); i++) {
 567                                                 int processToAssign = (i+1) % processors;
 568                                                 if (processToAssign == 0) { processToAssign = processors; }
 569
 570                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 571                                         }
 572
 573                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 574                                         for (int i = 0; i < processors; i++) {
 575                                                 int remainder = ((i+1) % processors);
 576                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 577                                         }
 578
 579                                         createProcesses(dividedNames);
 580
 581                                         if (m->control_pressed) { return 0; }
 582
 583                                         //get list of list file names from each process
 584                                         for(int i=0;i<processors;i++){
 585                                                 string filename = toString(processIDS[i]) + ".temp";
 586                                                 ifstream in;
 587                                                 m->openInputFile(filename, in);
 588
 589                                                 in >> tag; m->gobble(in);
 590
 591                                                 while(!in.eof()) {
 592                                                         string tempName;
 593                                                         in >> tempName; m->gobble(in);
 594                                                         listFileNames.push_back(tempName);
 595                                                 }
 596                                                 in.close();
 597                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 598
 599                                                 //get labels
 600                                                 filename = toString(processIDS[i]) + ".temp.labels";
 601                                                 ifstream in2;
 602                                                 m->openInputFile(filename, in2);
 603
 604                                                 float tempCutoff;
 605                                                 in2 >> tempCutoff; m->gobble(in2);
 606                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 607
 608                                                 while(!in2.eof()) {
 609                                                         string tempName;
 610                                                         in2 >> tempName; m->gobble(in2);
 611                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 612                                                 }
 613                                                 in2.close();
 614                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 615                                         }
 616                                 }
 617                 #else
 618                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 619                 #endif
 620         #endif
 621                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 622
 623                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 624
 625                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 626
 627                 //****************** merge list file and create rabund and sabund files ******************************//
 628                 estart = time(NULL);
 629                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 630
 631                 #ifdef USE_MPI
 632                         if (pid == 0) { //only process 0 merges
 633                 #endif
 634
 635                 ListVector* listSingle;
 636                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 637
 638                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 639
 640                 mergeLists(listFileNames, labelBins, listSingle);
 641
 642                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 643
 644                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 645
 646                 //set list file as new current listfile
 647                 string current = "";
 648                 itTypes = outputTypes.find("list");
 649                 if (itTypes != outputTypes.end()) {
 650                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
 651                 }
 652
 653                 //set rabund file as new current rabundfile
 654                 itTypes = outputTypes.find("rabund");
 655                 if (itTypes != outputTypes.end()) {
 656                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
 657                 }
 658
 659                 //set sabund file as new current sabundfile
 660                 itTypes = outputTypes.find("sabund");
 661                 if (itTypes != outputTypes.end()) {
 662                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
 663                 }
 664
 665                 m->mothurOutEndLine();
 666                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 667                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 668                 m->mothurOutEndLine();
 669
 670                 #ifdef USE_MPI
 671                         } //only process 0 merges
 672
 673                         //make everyone wait
 674                         MPI_Barrier(MPI_COMM_WORLD);
 675                 #endif
 676
 677                 return 0;
 678         }
 679         catch(exception& e) {
 680                 m->errorOut(e, "ClusterSplitCommand", "execute");
 681                 exit(1);
 682         }
 683 }
 684 //**********************************************************************************************************************
 685 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 686         try {
 687
 688                 map<float, int> labelBin;
 689                 vector<float> orderFloat;
 690                 int numSingleBins;
 691
 692                 //read in singletons
 693                 if (singleton != "none") {
 694                         ifstream in;
 695                         m->openInputFile(singleton, in);
 696
 697                         string firstCol, secondCol;
 698                         listSingle = new ListVector();
 699                         while (!in.eof()) {
 700                                 in >> firstCol >> secondCol; m->gobble(in);
 701                                 listSingle->push_back(secondCol);
 702                         }
 703                         in.close();
 704                         remove(singleton.c_str());
 705
 706                         numSingleBins = listSingle->getNumBins();
 707                 }else{  listSingle = NULL; numSingleBins = 0;  }
 708
 709                 //go through users set and make them floats so we can sort them
 710                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 711                         float temp = -10.0;
 712
 713                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 714                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 715
 716                         if (temp <= cutoff) {
 717                                 orderFloat.push_back(temp);
 718                                 labelBin[temp] = numSingleBins; //initialize numbins
 719                         }
 720                 }
 721
 722                 //sort order
 723                 sort(orderFloat.begin(), orderFloat.end());
 724                 userLabels.clear();
 725
 726                 //get the list info from each file
 727                 for (int k = 0; k < listNames.size(); k++) {
 728
 729                         if (m->control_pressed) {
 730                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 731                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 732                                 return labelBin;
 733                         }
 734
 735                         InputData* input = new InputData(listNames[k], "list");
 736                         ListVector* list = input->getListVector();
 737                         string lastLabel = list->getLabel();
 738
 739                         string filledInList = listNames[k] + "filledInTemp";
 740                         ofstream outFilled;
 741                         m->openOutputFile(filledInList, outFilled);
 742
 743                         //for each label needed
 744                         for(int l = 0; l < orderFloat.size(); l++){
 745
 746                                 string thisLabel;
 747                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 748                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 749
 750                                 //this file has reached the end
 751                                 if (list == NULL) {
 752                                         list = input->getListVector(lastLabel, true);
 753                                 }else{  //do you have the distance, or do you need to fill in
 754
 755                                         float labelFloat;
 756                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 757                                         else { convert(list->getLabel(), labelFloat); }
 758
 759                                         //check for missing labels
 760                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 761                                                 //if its bigger get last label, otherwise keep it
 762                                                 delete list;
 763                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 764                                         }
 765                                         lastLabel = list->getLabel();
 766                                 }
 767
 768                                 //print to new file
 769                                 list->setLabel(thisLabel);
 770                                 list->print(outFilled);
 771
 772                                 //update labelBin
 773                                 labelBin[orderFloat[l]] += list->getNumBins();
 774
 775                                 delete list;
 776
 777                                 list = input->getListVector();
 778                         }
 779
 780                         if (list != NULL) { delete list; }
 781                         delete input;
 782
 783                         outFilled.close();
 784                         remove(listNames[k].c_str());
 785                         rename(filledInList.c_str(), listNames[k].c_str());
 786                 }
 787
 788                 return labelBin;
 789         }
 790         catch(exception& e) {
 791                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 792                 exit(1);
 793         }
 794 }
 795 //**********************************************************************************************************************
 796 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 797         try {
 798                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 799                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 800
 801                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 802                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 803                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 804
 805                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 806                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 807                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 808
 809                 map<float, int>::iterator itLabel;
 810
 811                 //for each label needed
 812                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 813
 814                         string thisLabel;
 815                         if (itLabel->first == -1) { thisLabel = "unique"; }
 816                         else { thisLabel = toString(itLabel->first,  length-1);  }
 817
 818                         outList << thisLabel << '\t' << itLabel->second << '\t';
 819
 820                         RAbundVector* rabund = new RAbundVector();
 821                         rabund->setLabel(thisLabel);
 822
 823                         //add in singletons
 824                         if (listSingle != NULL) {
 825                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 826                                         outList << listSingle->get(j) << '\t';
 827                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 828                                 }
 829                         }
 830
 831                         //get the list info from each file
 832                         for (int k = 0; k < listNames.size(); k++) {
 833
 834                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 835
 836                                 InputData* input = new InputData(listNames[k], "list");
 837                                 ListVector* list = input->getListVector(thisLabel);
 838
 839                                 //this file has reached the end
 840                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 841                                 else {
 842                                         for (int j = 0; j < list->getNumBins(); j++) {
 843                                                 outList << list->get(j) << '\t';
 844                                                 rabund->push_back(m->getNumNames(list->get(j)));
 845                                         }
 846                                         delete list;
 847                                 }
 848                                 delete input;
 849                         }
 850
 851                         SAbundVector sabund = rabund->getSAbundVector();
 852
 853                         sabund.print(outSabund);
 854                         rabund->print(outRabund);
 855                         outList << endl;
 856
 857                         delete rabund;
 858                 }
 859
 860                 outList.close();
 861                 outRabund.close();
 862                 outSabund.close();
 863
 864                 if (listSingle != NULL) { delete listSingle;  }
 865
 866                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 867
 868                 return 0;
 869         }
 870         catch(exception& e) {
 871                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 872                 exit(1);
 873         }
 874 }
 875
 876 //**********************************************************************************************************************
 877
 878 void ClusterSplitCommand::printData(ListVector* oldList){
 879         try {
 880                 string label = oldList->getLabel();
 881                 RAbundVector oldRAbund = oldList->getRAbundVector();
 882
 883                 oldRAbund.setLabel(label);
 884                 if (m->isTrue(showabund)) {
 885                         oldRAbund.getSAbundVector().print(cout);
 886                 }
 887                 oldRAbund.print(outRabund);
 888                 oldRAbund.getSAbundVector().print(outSabund);
 889
 890                 oldList->print(outList);
 891         }
 892         catch(exception& e) {
 893                 m->errorOut(e, "ClusterSplitCommand", "printData");
 894                 exit(1);
 895         }
 896 }
 897 //**********************************************************************************************************************
 898 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 899         try {
 900
 901         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 902                 int process = 0;
 903                 int exitCommand = 1;
 904                 processIDS.clear();
 905
 906                 //loop through and create all the processes you want
 907                 while (process != processors) {
 908                         int pid = fork();
 909
 910                         if (pid > 0) {
 911                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 912                                 process++;
 913                         }else if (pid == 0){
 914                                 set<string> labels;
 915                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 916
 917                                 //write out names to file
 918                                 string filename = toString(getpid()) + ".temp";
 919                                 ofstream out;
 920                                 m->openOutputFile(filename, out);
 921                                 out << tag << endl;
 922                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 923                                 out.close();
 924
 925                                 //print out labels
 926                                 ofstream outLabels;
 927                                 filename = toString(getpid()) + ".temp.labels";
 928                                 m->openOutputFile(filename, outLabels);
 929
 930                                 outLabels << cutoff << endl;
 931                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 932                                         outLabels << (*it) << endl;
 933                                 }
 934                                 outLabels.close();
 935
 936                                 exit(0);
 937                         }else {
 938                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 939                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 940                                 exit(0);
 941                         }
 942                 }
 943
 944                 //force parent to wait until all the processes are done
 945                 for (int i=0;i<processors;i++) {
 946                         int temp = processIDS[i];
 947                         wait(&temp);
 948                 }
 949
 950                 return exitCommand;
 951         #endif
 952
 953         }
 954         catch(exception& e) {
 955                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 956                 exit(1);
 957         }
 958 }
 959 //**********************************************************************************************************************
 960
 961 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 962         try {
 963                 Cluster* cluster;
 964                 SparseMatrix* matrix;
 965                 ListVector* list;
 966                 ListVector oldList;
 967                 RAbundVector* rabund;
 968
 969                 vector<string> listFileNames;
 970
 971                 double smallestCutoff = cutoff;
 972
 973                 //cluster each distance file
 974                 for (int i = 0; i < distNames.size(); i++) {
 975                         if (m->control_pressed) { return listFileNames; }
 976
 977                         string thisNamefile = distNames[i].begin()->second;
 978                         string thisDistFile = distNames[i].begin()->first;
 979
 980                         #ifdef USE_MPI
 981                                 int pid;
 982                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 983
 984                                 //output your files too
 985                                 if (pid != 0) {
 986                                         cout << endl << "Reading " << thisDistFile << endl;
 987                                 }
 988                         #endif
 989
 990                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 991
 992                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 993                         read->setCutoff(cutoff);
 994
 995                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 996                         nameMap->readMap();
 997                         read->read(nameMap);
 998
 999                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
1000
1001                         list = read->getListVector();
1002                         oldList = *list;
1003                         matrix = read->getMatrix();
1004
1005                         delete read;
1006                         delete nameMap;
1007
1008
1009                         #ifdef USE_MPI
1010                                 //output your files too
1011                                 if (pid != 0) {
1012                                         cout << endl << "Clustering " << thisDistFile << endl;
1013                                 }
1014                         #endif
1015
1016                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1017
1018                         rabund = new RAbundVector(list->getRAbundVector());
1019
1020                         //create cluster
1021                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1022                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1023                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1024                         tag = cluster->getTag();
1025
1026                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1027                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1028
1029                         ofstream listFile;
1030                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1031
1032                         listFileNames.push_back(fileroot+ tag + ".list");
1033
1034                         float previousDist = 0.00000;
1035                         float rndPreviousDist = 0.00000;
1036
1037                         oldList = *list;
1038
1039                         print_start = true;
1040                         start = time(NULL);
1041                         double saveCutoff = cutoff;
1042
1043                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1044
1045                                 if (m->control_pressed) { //clean up
1046                                         delete matrix; delete list;     delete cluster; delete rabund;
1047                                         listFile.close();
1048                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1049                                         listFileNames.clear(); return listFileNames;
1050                                 }
1051
1052                                 cluster->update(saveCutoff);
1053
1054                                 float dist = matrix->getSmallDist();
1055                                 float rndDist;
1056                                 if (hard) {
1057                                         rndDist = m->ceilDist(dist, precision);
1058                                 }else{
1059                                         rndDist = m->roundDist(dist, precision);
1060                                 }
1061
1062                                 if(previousDist <= 0.0000 && dist != previousDist){
1063                                         oldList.setLabel("unique");
1064                                         oldList.print(listFile);
1065                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1066                                 }
1067                                 else if(rndDist != rndPreviousDist){
1068                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1069                                         oldList.print(listFile);
1070                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1071                                 }
1072
1073                                 previousDist = dist;
1074                                 rndPreviousDist = rndDist;
1075                                 oldList = *list;
1076                         }
1077
1078
1079                         if(previousDist <= 0.0000){
1080                                 oldList.setLabel("unique");
1081                                 oldList.print(listFile);
1082                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1083                         }
1084                         else if(rndPreviousDist<cutoff){
1085                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1086                                 oldList.print(listFile);
1087                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1088                         }
1089
1090                         delete matrix; delete list;     delete cluster; delete rabund;
1091                         listFile.close();
1092
1093                         if (m->control_pressed) { //clean up
1094                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1095                                 listFileNames.clear(); return listFileNames;
1096                         }
1097
1098                         remove(thisDistFile.c_str());
1099                         remove(thisNamefile.c_str());
1100
1101                         if (saveCutoff != cutoff) {
1102                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1103                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1104
1105                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1106                         }
1107
1108                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1109                 }
1110
1111                 cutoff = smallestCutoff;
1112
1113                 return listFileNames;
1114
1115         }
1116         catch(exception& e) {
1117                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1118                 exit(1);
1119         }
1120
1121
1122 }
1123 //**********************************************************************************************************************
1124
1125 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1126         try{
1127
1128 #ifdef USE_MPI
1129                 int pid;
1130                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1131
1132                 if (pid != 0) {
1133 #endif
1134
1135                 string thisOutputDir = outputDir;
1136                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1137                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1138                 remove(outputFileName.c_str());
1139
1140
1141                 for (int i = 0; i < distNames.size(); i++) {
1142                         if (m->control_pressed) {  return 0; }
1143
1144                         string thisDistFile = distNames[i].begin()->first;
1145
1146                         m->appendFiles(thisDistFile, outputFileName);
1147                 }
1148
1149                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1150
1151 #ifdef USE_MPI
1152                 }
1153 #endif
1154
1155                 return 0;
1156
1157
1158         }
1159         catch(exception& e) {
1160                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1161                 exit(1);
1162         }
1163 }
1164 //**********************************************************************************************************************