clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::setParameters(){
  21         try {
  22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
  23                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
  24                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
  25                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
  26                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
  27                 CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
  28                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
  29                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
  30                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
  31                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
  32                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
  33                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
  34                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
  35                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
  36                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
  37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
  38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
  39
  40                 vector<string> myArray;
  41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
  42                 return myArray;
  43         }
  44         catch(exception& e) {
  45                 m->errorOut(e, "ClusterSplitCommand", "setParameters");
  46                 exit(1);
  47         }
  48 }
  49 //**********************************************************************************************************************
  50 string ClusterSplitCommand::getHelpString(){
  51         try {
  52                 string helpString = "";
  53                 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
  54                 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
  55                 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
  56                 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n";
  57                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
  58                 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n";
  59                 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
  60                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
  61                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
  62                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
  63                 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
  64                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
  65                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
  66                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
  67                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
  68                 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
  69                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
  70 #ifdef USE_MPI
  71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  72 #endif
  73                 helpString += "The cluster.split command should be in the following format: \n";
  74                 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
  75                 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";
  76                 return helpString;
  77         }
  78         catch(exception& e) {
  79                 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
  80                 exit(1);
  81         }
  82 }
  83 //**********************************************************************************************************************
  84 ClusterSplitCommand::ClusterSplitCommand(){
  85         try {
  86                 abort = true; calledHelp = true;
  87                 setParameters();
  88                 vector<string> tempOutNames;
  89                 outputTypes["list"] = tempOutNames;
  90                 outputTypes["rabund"] = tempOutNames;
  91                 outputTypes["sabund"] = tempOutNames;
  92                 outputTypes["column"] = tempOutNames;
  93         }
  94         catch(exception& e) {
  95                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  96                 exit(1);
  97         }
  98 }
  99 //**********************************************************************************************************************
 100 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
 101 ClusterSplitCommand::ClusterSplitCommand(string option)  {
 102         try{
 103                 abort = false; calledHelp = false;
 104                 format = "";
 105
 106                 //allow user to run help
 107                 if(option == "help") { help(); abort = true; calledHelp = true; }
 108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
 109
 110                 else {
 111                         vector<string> myArray = setParameters();
 112
 113                         OptionParser parser(option);
 114                         map<string,string> parameters = parser.getParameters();
 115
 116                         ValidParameters validParameter("cluster.split");
 117
 118                         //check to make sure all parameters are valid for command
 119                         map<string,string>::iterator it;
 120                         for (it = parameters.begin(); it != parameters.end(); it++) {
 121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
 122                                         abort = true;
 123                                 }
 124                         }
 125
 126                         //initialize outputTypes
 127                         vector<string> tempOutNames;
 128                         outputTypes["list"] = tempOutNames;
 129                         outputTypes["rabund"] = tempOutNames;
 130                         outputTypes["sabund"] = tempOutNames;
 131                         outputTypes["column"] = tempOutNames;
 132
 133                         //if the user changes the output directory command factory will send this info to us in the output parameter
 134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 135
 136                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 138                         if (inputDir == "not found"){   inputDir = "";          }
 139                         else {
 140                                 string path;
 141                                 it = parameters.find("phylip");
 142                                 //user has given a template file
 143                                 if(it != parameters.end()){
 144                                         path = m->hasPath(it->second);
 145                                         //if the user has not given a path then, add inputdir. else leave path alone.
 146                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 147                                 }
 148
 149                                 it = parameters.find("column");
 150                                 //user has given a template file
 151                                 if(it != parameters.end()){
 152                                         path = m->hasPath(it->second);
 153                                         //if the user has not given a path then, add inputdir. else leave path alone.
 154                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 155                                 }
 156
 157                                 it = parameters.find("name");
 158                                 //user has given a template file
 159                                 if(it != parameters.end()){
 160                                         path = m->hasPath(it->second);
 161                                         //if the user has not given a path then, add inputdir. else leave path alone.
 162                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 163                                 }
 164
 165                                 it = parameters.find("taxonomy");
 166                                 //user has given a template file
 167                                 if(it != parameters.end()){
 168                                         path = m->hasPath(it->second);
 169                                         //if the user has not given a path then, add inputdir. else leave path alone.
 170                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 171                                 }
 172
 173                                 it = parameters.find("fasta");
 174                                 //user has given a template file
 175                                 if(it != parameters.end()){
 176                                         path = m->hasPath(it->second);
 177                                         //if the user has not given a path then, add inputdir. else leave path alone.
 178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 179                                 }
 180                         }
 181
 182                         //check for required parameters
 183                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 184                         if (phylipfile == "not open") { abort = true; }
 185                         else if (phylipfile == "not found") { phylipfile = ""; }
 186                         else {  distfile = phylipfile;  format = "phylip";      m->setPhylipFile(phylipfile); }
 187
 188                         columnfile = validParameter.validFile(parameters, "column", true);
 189                         if (columnfile == "not open") { abort = true; }
 190                         else if (columnfile == "not found") { columnfile = ""; }
 191                         else {  distfile = columnfile; format = "column";       m->setColumnFile(columnfile); }
 192
 193                         namefile = validParameter.validFile(parameters, "name", true);
 194                         if (namefile == "not open") { abort = true; }
 195                         else if (namefile == "not found") { namefile = "";  }
 196                         else { m->setNameFile(namefile); }
 197
 198                         fastafile = validParameter.validFile(parameters, "fasta", true);
 199                         if (fastafile == "not open") { abort = true; }
 200                         else if (fastafile == "not found") { fastafile = ""; }
 201                         else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
 202
 203                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 204                         if (taxFile == "not open") { abort = true; }
 205                         else if (taxFile == "not found") { taxFile = ""; }
 206                         else {  m->setTaxonomyFile(taxFile); }
 207
 208                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) {
 209                                 //is there are current file available for either of these?
 210                                 //give priority to column, then phylip, then fasta
 211                                 columnfile = m->getColumnFile();
 212                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
 213                                 else {
 214                                         phylipfile = m->getPhylipFile();
 215                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
 216                                         else {
 217                                                 fastafile = m->getFastaFile();
 218                                                 if (fastafile != "") {  m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
 219                                                 else {
 220                                                         m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine();
 221                                                         abort = true;
 222                                                 }
 223                                         }
 224                                 }
 225                         }
 226                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 227
 228                         if (columnfile != "") {
 229                                 if (namefile == "") {
 230                                         namefile = m->getNameFile();
 231                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 232                                         else {
 233                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
 234                                                 abort = true;
 235                                         }
 236                                 }
 237                         }
 238
 239                         if (fastafile != "") {
 240                                 if (taxFile == "") {
 241                                         taxFile = m->getTaxonomyFile();
 242                                         if (taxFile != "") {  m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
 243                                         else {
 244                                                 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 245                                                 abort = true;
 246                                         }
 247                                 }
 248
 249                                 if (namefile == "") {
 250                                         namefile = m->getNameFile();
 251                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
 252                                         else {
 253                                                 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
 254                                                 abort = true;
 255                                         }
 256                                 }
 257                         }
 258
 259                         //check for optional parameter and set defaults
 260                         // ...at some point should added some additional type checking...
 261                         //get user cutoff and precision or use defaults
 262                         string temp;
 263                         temp = validParameter.validFile(parameters, "precision", false);
 264                         if (temp == "not found") { temp = "100"; }
 265                         //saves precision legnth for formatting below
 266                         length = temp.length();
 267                         convert(temp, precision);
 268
 269                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
 270                         hard = m->isTrue(temp);
 271
 272                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 273                         large = m->isTrue(temp);
 274
 275                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
 276                         m->setProcessors(temp);
 277                         convert(temp, processors);
 278
 279                         temp = validParameter.validFile(parameters, "splitmethod", false);
 280                         if (splitmethod != "fasta") {
 281                                 if (temp == "not found")  { splitmethod = "distance"; }
 282                                 else {  splitmethod = temp; }
 283                         }
 284
 285                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 286                         convert(temp, cutoff);
 287                         cutoff += (5 / (precision * 10.0));
 288
 289                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 290                         convert(temp, taxLevelCutoff);
 291
 292                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
 293
 294                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 295                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 296
 297                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 298                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 299
 300                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 301
 302                         showabund = validParameter.validFile(parameters, "showabund", false);
 303                         if (showabund == "not found") { showabund = "T"; }
 304
 305                         timing = validParameter.validFile(parameters, "timing", false);
 306                         if (timing == "not found") { timing = "F"; }
 307
 308                 }
 309         }
 310         catch(exception& e) {
 311                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 312                 exit(1);
 313         }
 314 }
 315
 316 //**********************************************************************************************************************
 317
 318 int ClusterSplitCommand::execute(){
 319         try {
 320
 321                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 322
 323                 time_t estart;
 324                 vector<string> listFileNames;
 325                 set<string> labels;
 326                 string singletonName = "";
 327                 double saveCutoff = cutoff;
 328
 329                 //****************** file prep work ******************************//
 330                 #ifdef USE_MPI
 331                         int pid;
 332                         int tag = 2001;
 333                         MPI_Status status;
 334                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 335                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 336
 337                         if (pid == 0) { //only process 0 converts and splits
 338
 339                 #endif
 340
 341                 //if user gave a phylip file convert to column file
 342                 if (format == "phylip") {
 343                         estart = time(NULL);
 344                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 345
 346                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 347
 348                         NameAssignment* nameMap = NULL;
 349                         convert->setFormat("phylip");
 350                         convert->read(nameMap);
 351
 352                         if (m->control_pressed) {  delete convert;  return 0;  }
 353
 354                         distfile = convert->getOutputFile();
 355
 356                         //if no names file given with phylip file, create it
 357                         ListVector* listToMakeNameFile =  convert->getListVector();
 358                         if (namefile == "") {  //you need to make a namefile for split matrix
 359                                 ofstream out;
 360                                 namefile = phylipfile + ".names";
 361                                 m->openOutputFile(namefile, out);
 362                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 363                                         string bin = listToMakeNameFile->get(i);
 364                                         out << bin << '\t' << bin << endl;
 365                                 }
 366                                 out.close();
 367                         }
 368                         delete listToMakeNameFile;
 369                         delete convert;
 370
 371                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 372                 }
 373                 if (m->control_pressed) { return 0; }
 374
 375                 estart = time(NULL);
 376                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 377
 378                 //split matrix into non-overlapping groups
 379                 SplitMatrix* split;
 380                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 381                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 382                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 383                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 384
 385                 split->split();
 386
 387                 if (m->control_pressed) { delete split; return 0; }
 388
 389                 singletonName = split->getSingletonNames();
 390                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 391                 delete split;
 392
 393                 //output a merged distance file
 394                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 395
 396
 397                 if (m->control_pressed) { return 0; }
 398
 399                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 400                 estart = time(NULL);
 401
 402                 //****************** break up files between processes and cluster each file set ******************************//
 403         #ifdef USE_MPI
 404                         ////you are process 0 from above////
 405
 406                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 407                         dividedNames.resize(processors);
 408
 409                         //for each file group figure out which process will complete it
 410                         //want to divide the load intelligently so the big files are spread between processes
 411                         for (int i = 0; i < distName.size(); i++) {
 412                                 int processToAssign = (i+1) % processors;
 413                                 if (processToAssign == 0) { processToAssign = processors; }
 414
 415                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 416                         }
 417
 418                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 419                         for (int i = 0; i < processors; i++) {
 420                                 int remainder = ((i+1) % processors);
 421                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 422                         }
 423
 424
 425                         //send each child the list of files it needs to process
 426                         for(int i = 1; i < processors; i++) {
 427                                 //send number of file pairs
 428                                 int num = dividedNames[i].size();
 429                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 430
 431                                 for (int j = 0; j < num; j++) { //send filenames to process i
 432                                         char tempDistFileName[1024];
 433                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 434                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 435
 436                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 437                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 438
 439                                         char tempNameFileName[1024];
 440                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 441                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 442
 443                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 444                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 445                                 }
 446                         }
 447
 448                         //process your share
 449                         listFileNames = cluster(dividedNames[0], labels);
 450
 451                         //receive the other processes info
 452                         for(int i = 1; i < processors; i++) {
 453                                 int num = dividedNames[i].size();
 454
 455                                 double tempCutoff;
 456                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 457                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 458
 459                                 //send list filenames to root process
 460                                 for (int j = 0; j < num; j++) {
 461                                         int lengthList = 0;
 462                                         char tempListFileName[1024];
 463
 464                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 465                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 466
 467                                         string myListFileName = tempListFileName;
 468                                         myListFileName = myListFileName.substr(0, lengthList);
 469
 470                                         listFileNames.push_back(myListFileName);
 471                                 }
 472
 473                                 //send Labels to root process
 474                                 int numLabels = 0;
 475                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 476
 477                                 for (int j = 0; j < numLabels; j++) {
 478                                         int lengthLabel = 0;
 479                                         char tempLabel[100];
 480
 481                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 482                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 483
 484                                         string myLabel = tempLabel;
 485                                         myLabel = myLabel.substr(0, lengthLabel);
 486
 487                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 488                                 }
 489                         }
 490
 491                 }else { //you are a child process
 492                         vector < map<string, string> >  myNames;
 493
 494                         //recieve the files you need to process
 495                         //receive number of file pairs
 496                         int num = 0;
 497                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 498
 499                         myNames.resize(num);
 500
 501                         for (int j = 0; j < num; j++) { //receive filenames to process
 502                                 int lengthDist = 0;
 503                                 char tempDistFileName[1024];
 504
 505                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 506                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 507
 508                                 string myDistFileName = tempDistFileName;
 509                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 510
 511                                 int lengthName = 0;
 512                                 char tempNameFileName[1024];
 513
 514                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 515                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 516
 517                                 string myNameFileName = tempNameFileName;
 518                                 myNameFileName = myNameFileName.substr(0, lengthName);
 519
 520                                 //save file name
 521                                 myNames[j][myDistFileName] = myNameFileName;
 522                         }
 523
 524                         //process them
 525                         listFileNames = cluster(myNames, labels);
 526
 527                         //send cutoff
 528                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 529
 530                         //send list filenames to root process
 531                         for (int j = 0; j < num; j++) {
 532                                 char tempListFileName[1024];
 533                                 strcpy(tempListFileName, listFileNames[j].c_str());
 534                                 int lengthList = listFileNames[j].length();
 535
 536                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 537                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 538                         }
 539
 540                         //send Labels to root process
 541                         int numLabels = labels.size();
 542                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 543
 544                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 545                                 char tempLabel[100];
 546                                 strcpy(tempLabel, (*it).c_str());
 547                                 int lengthLabel = (*it).length();
 548
 549                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 550                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 551                         }
 552                 }
 553
 554                 //make everyone wait
 555                 MPI_Barrier(MPI_COMM_WORLD);
 556
 557         #else
 558
 559                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 560                                 if(processors == 1){
 561                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 562                                 }else{
 563                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 564                                         dividedNames.resize(processors);
 565
 566                                         //for each file group figure out which process will complete it
 567                                         //want to divide the load intelligently so the big files are spread between processes
 568                                         for (int i = 0; i < distName.size(); i++) {
 569                                                 int processToAssign = (i+1) % processors;
 570                                                 if (processToAssign == 0) { processToAssign = processors; }
 571
 572                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 573                                         }
 574
 575                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 576                                         for (int i = 0; i < processors; i++) {
 577                                                 int remainder = ((i+1) % processors);
 578                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 579                                         }
 580
 581                                         createProcesses(dividedNames);
 582
 583                                         if (m->control_pressed) { return 0; }
 584
 585                                         //get list of list file names from each process
 586                                         for(int i=0;i<processors;i++){
 587                                                 string filename = toString(processIDS[i]) + ".temp";
 588                                                 ifstream in;
 589                                                 m->openInputFile(filename, in);
 590
 591                                                 in >> tag; m->gobble(in);
 592
 593                                                 while(!in.eof()) {
 594                                                         string tempName;
 595                                                         in >> tempName; m->gobble(in);
 596                                                         listFileNames.push_back(tempName);
 597                                                 }
 598                                                 in.close();
 599                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 600
 601                                                 //get labels
 602                                                 filename = toString(processIDS[i]) + ".temp.labels";
 603                                                 ifstream in2;
 604                                                 m->openInputFile(filename, in2);
 605
 606                                                 float tempCutoff;
 607                                                 in2 >> tempCutoff; m->gobble(in2);
 608                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 609
 610                                                 while(!in2.eof()) {
 611                                                         string tempName;
 612                                                         in2 >> tempName; m->gobble(in2);
 613                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 614                                                 }
 615                                                 in2.close();
 616                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 617                                         }
 618                                 }
 619                 #else
 620                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 621                 #endif
 622         #endif
 623                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 624
 625                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 626
 627                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 628
 629                 //****************** merge list file and create rabund and sabund files ******************************//
 630                 estart = time(NULL);
 631                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 632
 633                 #ifdef USE_MPI
 634                         if (pid == 0) { //only process 0 merges
 635                 #endif
 636
 637                 ListVector* listSingle;
 638                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 639
 640                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 641
 642                 mergeLists(listFileNames, labelBins, listSingle);
 643
 644                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 645
 646                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 647
 648                 //set list file as new current listfile
 649                 string current = "";
 650                 itTypes = outputTypes.find("list");
 651                 if (itTypes != outputTypes.end()) {
 652                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
 653                 }
 654
 655                 //set rabund file as new current rabundfile
 656                 itTypes = outputTypes.find("rabund");
 657                 if (itTypes != outputTypes.end()) {
 658                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
 659                 }
 660
 661                 //set sabund file as new current sabundfile
 662                 itTypes = outputTypes.find("sabund");
 663                 if (itTypes != outputTypes.end()) {
 664                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
 665                 }
 666
 667                 m->mothurOutEndLine();
 668                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 669                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 670                 m->mothurOutEndLine();
 671
 672                 #ifdef USE_MPI
 673                         } //only process 0 merges
 674
 675                         //make everyone wait
 676                         MPI_Barrier(MPI_COMM_WORLD);
 677                 #endif
 678
 679                 return 0;
 680         }
 681         catch(exception& e) {
 682                 m->errorOut(e, "ClusterSplitCommand", "execute");
 683                 exit(1);
 684         }
 685 }
 686 //**********************************************************************************************************************
 687 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 688         try {
 689
 690                 map<float, int> labelBin;
 691                 vector<float> orderFloat;
 692                 int numSingleBins;
 693
 694                 //read in singletons
 695                 if (singleton != "none") {
 696                         ifstream in;
 697                         m->openInputFile(singleton, in);
 698
 699                         string firstCol, secondCol;
 700                         listSingle = new ListVector();
 701                         while (!in.eof()) {
 702                                 in >> firstCol >> secondCol; m->gobble(in);
 703                                 listSingle->push_back(secondCol);
 704                         }
 705                         in.close();
 706                         remove(singleton.c_str());
 707
 708                         numSingleBins = listSingle->getNumBins();
 709                 }else{  listSingle = NULL; numSingleBins = 0;  }
 710
 711                 //go through users set and make them floats so we can sort them
 712                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 713                         float temp = -10.0;
 714
 715                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 716                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 717
 718                         if (temp <= cutoff) {
 719                                 orderFloat.push_back(temp);
 720                                 labelBin[temp] = numSingleBins; //initialize numbins
 721                         }
 722                 }
 723
 724                 //sort order
 725                 sort(orderFloat.begin(), orderFloat.end());
 726                 userLabels.clear();
 727
 728                 //get the list info from each file
 729                 for (int k = 0; k < listNames.size(); k++) {
 730
 731                         if (m->control_pressed) {
 732                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 733                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 734                                 return labelBin;
 735                         }
 736
 737                         InputData* input = new InputData(listNames[k], "list");
 738                         ListVector* list = input->getListVector();
 739                         string lastLabel = list->getLabel();
 740
 741                         string filledInList = listNames[k] + "filledInTemp";
 742                         ofstream outFilled;
 743                         m->openOutputFile(filledInList, outFilled);
 744
 745                         //for each label needed
 746                         for(int l = 0; l < orderFloat.size(); l++){
 747
 748                                 string thisLabel;
 749                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 750                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 751
 752                                 //this file has reached the end
 753                                 if (list == NULL) {
 754                                         list = input->getListVector(lastLabel, true);
 755                                 }else{  //do you have the distance, or do you need to fill in
 756
 757                                         float labelFloat;
 758                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 759                                         else { convert(list->getLabel(), labelFloat); }
 760
 761                                         //check for missing labels
 762                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 763                                                 //if its bigger get last label, otherwise keep it
 764                                                 delete list;
 765                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 766                                         }
 767                                         lastLabel = list->getLabel();
 768                                 }
 769
 770                                 //print to new file
 771                                 list->setLabel(thisLabel);
 772                                 list->print(outFilled);
 773
 774                                 //update labelBin
 775                                 labelBin[orderFloat[l]] += list->getNumBins();
 776
 777                                 delete list;
 778
 779                                 list = input->getListVector();
 780                         }
 781
 782                         if (list != NULL) { delete list; }
 783                         delete input;
 784
 785                         outFilled.close();
 786                         remove(listNames[k].c_str());
 787                         rename(filledInList.c_str(), listNames[k].c_str());
 788                 }
 789
 790                 return labelBin;
 791         }
 792         catch(exception& e) {
 793                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 794                 exit(1);
 795         }
 796 }
 797 //**********************************************************************************************************************
 798 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 799         try {
 800                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 801                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 802
 803                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 804                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 805                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 806
 807                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 808                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 809                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 810
 811                 map<float, int>::iterator itLabel;
 812
 813                 //for each label needed
 814                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 815
 816                         string thisLabel;
 817                         if (itLabel->first == -1) { thisLabel = "unique"; }
 818                         else { thisLabel = toString(itLabel->first,  length-1);  }
 819
 820                         outList << thisLabel << '\t' << itLabel->second << '\t';
 821
 822                         RAbundVector* rabund = new RAbundVector();
 823                         rabund->setLabel(thisLabel);
 824
 825                         //add in singletons
 826                         if (listSingle != NULL) {
 827                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 828                                         outList << listSingle->get(j) << '\t';
 829                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 830                                 }
 831                         }
 832
 833                         //get the list info from each file
 834                         for (int k = 0; k < listNames.size(); k++) {
 835
 836                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 837
 838                                 InputData* input = new InputData(listNames[k], "list");
 839                                 ListVector* list = input->getListVector(thisLabel);
 840
 841                                 //this file has reached the end
 842                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 843                                 else {
 844                                         for (int j = 0; j < list->getNumBins(); j++) {
 845                                                 outList << list->get(j) << '\t';
 846                                                 rabund->push_back(m->getNumNames(list->get(j)));
 847                                         }
 848                                         delete list;
 849                                 }
 850                                 delete input;
 851                         }
 852
 853                         SAbundVector sabund = rabund->getSAbundVector();
 854
 855                         sabund.print(outSabund);
 856                         rabund->print(outRabund);
 857                         outList << endl;
 858
 859                         delete rabund;
 860                 }
 861
 862                 outList.close();
 863                 outRabund.close();
 864                 outSabund.close();
 865
 866                 if (listSingle != NULL) { delete listSingle;  }
 867
 868                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 869
 870                 return 0;
 871         }
 872         catch(exception& e) {
 873                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 874                 exit(1);
 875         }
 876 }
 877
 878 //**********************************************************************************************************************
 879
 880 void ClusterSplitCommand::printData(ListVector* oldList){
 881         try {
 882                 string label = oldList->getLabel();
 883                 RAbundVector oldRAbund = oldList->getRAbundVector();
 884
 885                 oldRAbund.setLabel(label);
 886                 if (m->isTrue(showabund)) {
 887                         oldRAbund.getSAbundVector().print(cout);
 888                 }
 889                 oldRAbund.print(outRabund);
 890                 oldRAbund.getSAbundVector().print(outSabund);
 891
 892                 oldList->print(outList);
 893         }
 894         catch(exception& e) {
 895                 m->errorOut(e, "ClusterSplitCommand", "printData");
 896                 exit(1);
 897         }
 898 }
 899 //**********************************************************************************************************************
 900 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 901         try {
 902
 903         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 904                 int process = 0;
 905                 int exitCommand = 1;
 906                 processIDS.clear();
 907
 908                 //loop through and create all the processes you want
 909                 while (process != processors) {
 910                         int pid = fork();
 911
 912                         if (pid > 0) {
 913                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 914                                 process++;
 915                         }else if (pid == 0){
 916                                 set<string> labels;
 917                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 918
 919                                 //write out names to file
 920                                 string filename = toString(getpid()) + ".temp";
 921                                 ofstream out;
 922                                 m->openOutputFile(filename, out);
 923                                 out << tag << endl;
 924                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 925                                 out.close();
 926
 927                                 //print out labels
 928                                 ofstream outLabels;
 929                                 filename = toString(getpid()) + ".temp.labels";
 930                                 m->openOutputFile(filename, outLabels);
 931
 932                                 outLabels << cutoff << endl;
 933                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 934                                         outLabels << (*it) << endl;
 935                                 }
 936                                 outLabels.close();
 937
 938                                 exit(0);
 939                         }else {
 940                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 941                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 942                                 exit(0);
 943                         }
 944                 }
 945
 946                 //force parent to wait until all the processes are done
 947                 for (int i=0;i<processors;i++) {
 948                         int temp = processIDS[i];
 949                         wait(&temp);
 950                 }
 951
 952                 return exitCommand;
 953         #endif
 954
 955         }
 956         catch(exception& e) {
 957                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 958                 exit(1);
 959         }
 960 }
 961 //**********************************************************************************************************************
 962
 963 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 964         try {
 965                 Cluster* cluster;
 966                 SparseMatrix* matrix;
 967                 ListVector* list;
 968                 ListVector oldList;
 969                 RAbundVector* rabund;
 970
 971                 vector<string> listFileNames;
 972
 973                 double smallestCutoff = cutoff;
 974
 975                 //cluster each distance file
 976                 for (int i = 0; i < distNames.size(); i++) {
 977                         if (m->control_pressed) { return listFileNames; }
 978
 979                         string thisNamefile = distNames[i].begin()->second;
 980                         string thisDistFile = distNames[i].begin()->first;
 981
 982                         #ifdef USE_MPI
 983                                 int pid;
 984                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 985
 986                                 //output your files too
 987                                 if (pid != 0) {
 988                                         cout << endl << "Reading " << thisDistFile << endl;
 989                                 }
 990                         #endif
 991
 992                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 993
 994                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 995                         read->setCutoff(cutoff);
 996
 997                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 998                         nameMap->readMap();
 999                         read->read(nameMap);
1000
1001                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
1002
1003                         list = read->getListVector();
1004                         oldList = *list;
1005                         matrix = read->getMatrix();
1006
1007                         delete read;
1008                         delete nameMap;
1009
1010
1011                         #ifdef USE_MPI
1012                                 //output your files too
1013                                 if (pid != 0) {
1014                                         cout << endl << "Clustering " << thisDistFile << endl;
1015                                 }
1016                         #endif
1017
1018                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1019
1020                         rabund = new RAbundVector(list->getRAbundVector());
1021
1022                         //create cluster
1023                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1024                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1025                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1026                         tag = cluster->getTag();
1027
1028                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1029                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1030
1031                         ofstream listFile;
1032                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1033
1034                         listFileNames.push_back(fileroot+ tag + ".list");
1035
1036                         float previousDist = 0.00000;
1037                         float rndPreviousDist = 0.00000;
1038
1039                         oldList = *list;
1040
1041                         print_start = true;
1042                         start = time(NULL);
1043                         double saveCutoff = cutoff;
1044
1045                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1046
1047                                 if (m->control_pressed) { //clean up
1048                                         delete matrix; delete list;     delete cluster; delete rabund;
1049                                         listFile.close();
1050                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1051                                         listFileNames.clear(); return listFileNames;
1052                                 }
1053
1054                                 cluster->update(saveCutoff);
1055
1056                                 float dist = matrix->getSmallDist();
1057                                 float rndDist;
1058                                 if (hard) {
1059                                         rndDist = m->ceilDist(dist, precision);
1060                                 }else{
1061                                         rndDist = m->roundDist(dist, precision);
1062                                 }
1063
1064                                 if(previousDist <= 0.0000 && dist != previousDist){
1065                                         oldList.setLabel("unique");
1066                                         oldList.print(listFile);
1067                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1068                                 }
1069                                 else if(rndDist != rndPreviousDist){
1070                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1071                                         oldList.print(listFile);
1072                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1073                                 }
1074
1075                                 previousDist = dist;
1076                                 rndPreviousDist = rndDist;
1077                                 oldList = *list;
1078                         }
1079
1080
1081                         if(previousDist <= 0.0000){
1082                                 oldList.setLabel("unique");
1083                                 oldList.print(listFile);
1084                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1085                         }
1086                         else if(rndPreviousDist<cutoff){
1087                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1088                                 oldList.print(listFile);
1089                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1090                         }
1091
1092                         delete matrix; delete list;     delete cluster; delete rabund;
1093                         listFile.close();
1094
1095                         if (m->control_pressed) { //clean up
1096                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1097                                 listFileNames.clear(); return listFileNames;
1098                         }
1099
1100                         remove(thisDistFile.c_str());
1101                         remove(thisNamefile.c_str());
1102
1103                         if (saveCutoff != cutoff) {
1104                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1105                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1106
1107                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1108                         }
1109
1110                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1111                 }
1112
1113                 cutoff = smallestCutoff;
1114
1115                 return listFileNames;
1116
1117         }
1118         catch(exception& e) {
1119                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1120                 exit(1);
1121         }
1122
1123
1124 }
1125 //**********************************************************************************************************************
1126
1127 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1128         try{
1129
1130 #ifdef USE_MPI
1131                 int pid;
1132                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1133
1134                 if (pid != 0) {
1135 #endif
1136
1137                 string thisOutputDir = outputDir;
1138                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1139                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1140                 remove(outputFileName.c_str());
1141
1142
1143                 for (int i = 0; i < distNames.size(); i++) {
1144                         if (m->control_pressed) {  return 0; }
1145
1146                         string thisDistFile = distNames[i].begin()->first;
1147
1148                         m->appendFiles(thisDistFile, outputFileName);
1149                 }
1150
1151                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1152
1153 #ifdef USE_MPI
1154                 }
1155 #endif
1156
1157                 return 0;
1158
1159
1160         }
1161         catch(exception& e) {
1162                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1163                 exit(1);
1164         }
1165 }
1166 //**********************************************************************************************************************