clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::getValidParameters(){
  21         try {
  22                 string AlignArray[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  23                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
  24                 return myArray;
  25         }
  26         catch(exception& e) {
  27                 m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
  28                 exit(1);
  29         }
  30 }
  31 //**********************************************************************************************************************
  32 ClusterSplitCommand::ClusterSplitCommand(){
  33         try {
  34                 abort = true;
  35                 //initialize outputTypes
  36                 vector<string> tempOutNames;
  37                 outputTypes["list"] = tempOutNames;
  38                 outputTypes["rabund"] = tempOutNames;
  39                 outputTypes["sabund"] = tempOutNames;
  40                 outputTypes["column"] = tempOutNames;
  41         }
  42         catch(exception& e) {
  43                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  44                 exit(1);
  45         }
  46 }
  47 //**********************************************************************************************************************
  48 vector<string> ClusterSplitCommand::getRequiredParameters(){
  49         try {
  50                 string Array[] =  {"fasta","phylip","column","or"};
  51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  52                 return myArray;
  53         }
  54         catch(exception& e) {
  55                 m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
  56                 exit(1);
  57         }
  58 }
  59 //**********************************************************************************************************************
  60 vector<string> ClusterSplitCommand::getRequiredFiles(){
  61         try {
  62                 vector<string> myArray;
  63                 return myArray;
  64         }
  65         catch(exception& e) {
  66                 m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
  67                 exit(1);
  68         }
  69 }
  70 //**********************************************************************************************************************
  71 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
  72 ClusterSplitCommand::ClusterSplitCommand(string option)  {
  73         try{
  74                 globaldata = GlobalData::getInstance();
  75                 abort = false;
  76                 format = "";
  77
  78                 //allow user to run help
  79                 if(option == "help") { help(); abort = true; }
  80
  81                 else {
  82                         //valid paramters for this command
  83                         string Array[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  84                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  85
  86                         OptionParser parser(option);
  87                         map<string,string> parameters = parser.getParameters();
  88
  89                         ValidParameters validParameter("cluster.split");
  90
  91                         //check to make sure all parameters are valid for command
  92                         map<string,string>::iterator it;
  93                         for (it = parameters.begin(); it != parameters.end(); it++) {
  94                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
  95                                         abort = true;
  96                                 }
  97                         }
  98
  99                         //initialize outputTypes
 100                         vector<string> tempOutNames;
 101                         outputTypes["list"] = tempOutNames;
 102                         outputTypes["rabund"] = tempOutNames;
 103                         outputTypes["sabund"] = tempOutNames;
 104                         outputTypes["column"] = tempOutNames;
 105
 106                         globaldata->newRead();
 107
 108                         //if the user changes the output directory command factory will send this info to us in the output parameter
 109                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 110
 111                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 113                         if (inputDir == "not found"){   inputDir = "";          }
 114                         else {
 115                                 string path;
 116                                 it = parameters.find("phylip");
 117                                 //user has given a template file
 118                                 if(it != parameters.end()){
 119                                         path = m->hasPath(it->second);
 120                                         //if the user has not given a path then, add inputdir. else leave path alone.
 121                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 122                                 }
 123
 124                                 it = parameters.find("column");
 125                                 //user has given a template file
 126                                 if(it != parameters.end()){
 127                                         path = m->hasPath(it->second);
 128                                         //if the user has not given a path then, add inputdir. else leave path alone.
 129                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 130                                 }
 131
 132                                 it = parameters.find("name");
 133                                 //user has given a template file
 134                                 if(it != parameters.end()){
 135                                         path = m->hasPath(it->second);
 136                                         //if the user has not given a path then, add inputdir. else leave path alone.
 137                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 138                                 }
 139
 140                                 it = parameters.find("taxonomy");
 141                                 //user has given a template file
 142                                 if(it != parameters.end()){
 143                                         path = m->hasPath(it->second);
 144                                         //if the user has not given a path then, add inputdir. else leave path alone.
 145                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 146                                 }
 147
 148                                 it = parameters.find("fasta");
 149                                 //user has given a template file
 150                                 if(it != parameters.end()){
 151                                         path = m->hasPath(it->second);
 152                                         //if the user has not given a path then, add inputdir. else leave path alone.
 153                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 154                                 }
 155                         }
 156
 157                         //check for required parameters
 158                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 159                         if (phylipfile == "not open") { abort = true; }
 160                         else if (phylipfile == "not found") { phylipfile = ""; }
 161                         else {  distfile = phylipfile;  format = "phylip";      }
 162
 163                         columnfile = validParameter.validFile(parameters, "column", true);
 164                         if (columnfile == "not open") { abort = true; }
 165                         else if (columnfile == "not found") { columnfile = ""; }
 166                         else {  distfile = columnfile; format = "column";       }
 167
 168                         namefile = validParameter.validFile(parameters, "name", true);
 169                         if (namefile == "not open") { abort = true; }
 170                         else if (namefile == "not found") { namefile = ""; }
 171
 172                         fastafile = validParameter.validFile(parameters, "fasta", true);
 173                         if (fastafile == "not open") { abort = true; }
 174                         else if (fastafile == "not found") { fastafile = ""; }
 175                         else { distfile = fastafile;  splitmethod = "fasta";  }
 176
 177                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 178                         if (taxFile == "not open") { abort = true; }
 179                         else if (taxFile == "not found") { taxFile = ""; }
 180
 181                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); abort = true; }
 182                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 183
 184                         if (columnfile != "") {
 185                                 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
 186                         }
 187
 188                         if (fastafile != "") {
 189                                 if (taxFile == "") { m->mothurOut("You need to provide a taxonomy file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 190                                 if (namefile == "") { m->mothurOut("You need to provide a names file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 191                         }
 192
 193                         //check for optional parameter and set defaults
 194                         // ...at some point should added some additional type checking...
 195                         //get user cutoff and precision or use defaults
 196                         string temp;
 197                         temp = validParameter.validFile(parameters, "precision", false);
 198                         if (temp == "not found") { temp = "100"; }
 199                         //saves precision legnth for formatting below
 200                         length = temp.length();
 201                         convert(temp, precision);
 202
 203                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
 204                         hard = m->isTrue(temp);
 205
 206                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 207                         large = m->isTrue(temp);
 208
 209                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
 210                         convert(temp, processors);
 211
 212                         temp = validParameter.validFile(parameters, "splitmethod", false);
 213                         if (splitmethod != "fasta") {
 214                                 if (temp == "not found")  { splitmethod = "distance"; }
 215                                 else {  splitmethod = temp; }
 216                         }
 217
 218                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 219                         convert(temp, cutoff);
 220                         cutoff += (5 / (precision * 10.0));
 221
 222                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 223                         convert(temp, taxLevelCutoff);
 224
 225                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
 226
 227                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 228                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 229
 230                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 231                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 232
 233                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 234
 235                         showabund = validParameter.validFile(parameters, "showabund", false);
 236                         if (showabund == "not found") { showabund = "T"; }
 237
 238                         timing = validParameter.validFile(parameters, "timing", false);
 239                         if (timing == "not found") { timing = "F"; }
 240
 241                 }
 242         }
 243         catch(exception& e) {
 244                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 245                 exit(1);
 246         }
 247 }
 248
 249 //**********************************************************************************************************************
 250
 251 void ClusterSplitCommand::help(){
 252         try {
 253                 m->mothurOut("The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n");
 254                 m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n");
 255                 m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n");
 256                 m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n");
 257                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n");
 258                 m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n");
 259                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n");
 260                 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
 261                 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file. \n");
 262                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
 263                 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
 264                 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
 265                 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
 266                 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
 267                 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
 268                 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
 269                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
 270                 #ifdef USE_MPI
 271                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
 272                 #endif
 273                 m->mothurOut("The cluster.split command should be in the following format: \n");
 274                 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
 275                 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");
 276
 277         }
 278         catch(exception& e) {
 279                 m->errorOut(e, "ClusterSplitCommand", "help");
 280                 exit(1);
 281         }
 282 }
 283
 284 //**********************************************************************************************************************
 285
 286 ClusterSplitCommand::~ClusterSplitCommand(){}
 287
 288 //**********************************************************************************************************************
 289
 290 int ClusterSplitCommand::execute(){
 291         try {
 292
 293                 if (abort == true) {    return 0;       }
 294
 295                 time_t estart;
 296                 vector<string> listFileNames;
 297                 set<string> labels;
 298                 string singletonName = "";
 299                 double saveCutoff = cutoff;
 300
 301                 //****************** file prep work ******************************//
 302                 #ifdef USE_MPI
 303                         int pid;
 304                         int tag = 2001;
 305                         MPI_Status status;
 306                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 307                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 308
 309                         if (pid == 0) { //only process 0 converts and splits
 310
 311                 #endif
 312
 313                 //if user gave a phylip file convert to column file
 314                 if (format == "phylip") {
 315                         estart = time(NULL);
 316                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 317
 318                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 319
 320                         NameAssignment* nameMap = NULL;
 321                         convert->setFormat("phylip");
 322                         convert->read(nameMap);
 323
 324                         if (m->control_pressed) {  delete convert;  return 0;  }
 325
 326                         distfile = convert->getOutputFile();
 327
 328                         //if no names file given with phylip file, create it
 329                         ListVector* listToMakeNameFile =  convert->getListVector();
 330                         if (namefile == "") {  //you need to make a namefile for split matrix
 331                                 ofstream out;
 332                                 namefile = phylipfile + ".names";
 333                                 m->openOutputFile(namefile, out);
 334                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 335                                         string bin = listToMakeNameFile->get(i);
 336                                         out << bin << '\t' << bin << endl;
 337                                 }
 338                                 out.close();
 339                         }
 340                         delete listToMakeNameFile;
 341                         delete convert;
 342
 343                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 344                 }
 345                 if (m->control_pressed) { return 0; }
 346
 347                 estart = time(NULL);
 348                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 349
 350                 //split matrix into non-overlapping groups
 351                 SplitMatrix* split;
 352                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 353                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 354                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 355                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 356
 357                 split->split();
 358
 359                 if (m->control_pressed) { delete split; return 0; }
 360
 361                 singletonName = split->getSingletonNames();
 362                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 363                 delete split;
 364
 365                 //output a merged distance file
 366                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 367
 368
 369                 if (m->control_pressed) { return 0; }
 370
 371                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 372                 estart = time(NULL);
 373
 374                 //****************** break up files between processes and cluster each file set ******************************//
 375         #ifdef USE_MPI
 376                         ////you are process 0 from above////
 377
 378                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 379                         dividedNames.resize(processors);
 380
 381                         //for each file group figure out which process will complete it
 382                         //want to divide the load intelligently so the big files are spread between processes
 383                         for (int i = 0; i < distName.size(); i++) {
 384                                 int processToAssign = (i+1) % processors;
 385                                 if (processToAssign == 0) { processToAssign = processors; }
 386
 387                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 388                         }
 389
 390                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 391                         for (int i = 0; i < processors; i++) {
 392                                 int remainder = ((i+1) % processors);
 393                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 394                         }
 395
 396
 397                         //send each child the list of files it needs to process
 398                         for(int i = 1; i < processors; i++) {
 399                                 //send number of file pairs
 400                                 int num = dividedNames[i].size();
 401                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 402
 403                                 for (int j = 0; j < num; j++) { //send filenames to process i
 404                                         char tempDistFileName[1024];
 405                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 406                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 407
 408                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 409                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 410
 411                                         char tempNameFileName[1024];
 412                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 413                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 414
 415                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 416                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 417                                 }
 418                         }
 419
 420                         //process your share
 421                         listFileNames = cluster(dividedNames[0], labels);
 422
 423                         //receive the other processes info
 424                         for(int i = 1; i < processors; i++) {
 425                                 int num = dividedNames[i].size();
 426
 427                                 double tempCutoff;
 428                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 429                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 430
 431                                 //send list filenames to root process
 432                                 for (int j = 0; j < num; j++) {
 433                                         int lengthList = 0;
 434                                         char tempListFileName[1024];
 435
 436                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 437                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 438
 439                                         string myListFileName = tempListFileName;
 440                                         myListFileName = myListFileName.substr(0, lengthList);
 441
 442                                         listFileNames.push_back(myListFileName);
 443                                 }
 444
 445                                 //send Labels to root process
 446                                 int numLabels = 0;
 447                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 448
 449                                 for (int j = 0; j < numLabels; j++) {
 450                                         int lengthLabel = 0;
 451                                         char tempLabel[100];
 452
 453                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 454                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 455
 456                                         string myLabel = tempLabel;
 457                                         myLabel = myLabel.substr(0, lengthLabel);
 458
 459                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 460                                 }
 461                         }
 462
 463                 }else { //you are a child process
 464                         vector < map<string, string> >  myNames;
 465
 466                         //recieve the files you need to process
 467                         //receive number of file pairs
 468                         int num = 0;
 469                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 470
 471                         myNames.resize(num);
 472
 473                         for (int j = 0; j < num; j++) { //receive filenames to process
 474                                 int lengthDist = 0;
 475                                 char tempDistFileName[1024];
 476
 477                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 478                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 479
 480                                 string myDistFileName = tempDistFileName;
 481                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 482
 483                                 int lengthName = 0;
 484                                 char tempNameFileName[1024];
 485
 486                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 487                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 488
 489                                 string myNameFileName = tempNameFileName;
 490                                 myNameFileName = myNameFileName.substr(0, lengthName);
 491
 492                                 //save file name
 493                                 myNames[j][myDistFileName] = myNameFileName;
 494                         }
 495
 496                         //process them
 497                         listFileNames = cluster(myNames, labels);
 498
 499                         //send cutoff
 500                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 501
 502                         //send list filenames to root process
 503                         for (int j = 0; j < num; j++) {
 504                                 char tempListFileName[1024];
 505                                 strcpy(tempListFileName, listFileNames[j].c_str());
 506                                 int lengthList = listFileNames[j].length();
 507
 508                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 509                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 510                         }
 511
 512                         //send Labels to root process
 513                         int numLabels = labels.size();
 514                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 515
 516                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 517                                 char tempLabel[100];
 518                                 strcpy(tempLabel, (*it).c_str());
 519                                 int lengthLabel = (*it).length();
 520
 521                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 522                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 523                         }
 524                 }
 525
 526                 //make everyone wait
 527                 MPI_Barrier(MPI_COMM_WORLD);
 528
 529         #else
 530
 531                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 532                                 if(processors == 1){
 533                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 534                                 }else{
 535                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 536                                         dividedNames.resize(processors);
 537
 538                                         //for each file group figure out which process will complete it
 539                                         //want to divide the load intelligently so the big files are spread between processes
 540                                         for (int i = 0; i < distName.size(); i++) {
 541                                                 int processToAssign = (i+1) % processors;
 542                                                 if (processToAssign == 0) { processToAssign = processors; }
 543
 544                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 545                                         }
 546
 547                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 548                                         for (int i = 0; i < processors; i++) {
 549                                                 int remainder = ((i+1) % processors);
 550                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 551                                         }
 552
 553                                         createProcesses(dividedNames);
 554
 555                                         if (m->control_pressed) { return 0; }
 556
 557                                         //get list of list file names from each process
 558                                         for(int i=0;i<processors;i++){
 559                                                 string filename = toString(processIDS[i]) + ".temp";
 560                                                 ifstream in;
 561                                                 m->openInputFile(filename, in);
 562
 563                                                 in >> tag; m->gobble(in);
 564
 565                                                 while(!in.eof()) {
 566                                                         string tempName;
 567                                                         in >> tempName; m->gobble(in);
 568                                                         listFileNames.push_back(tempName);
 569                                                 }
 570                                                 in.close();
 571                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 572
 573                                                 //get labels
 574                                                 filename = toString(processIDS[i]) + ".temp.labels";
 575                                                 ifstream in2;
 576                                                 m->openInputFile(filename, in2);
 577
 578                                                 float tempCutoff;
 579                                                 in2 >> tempCutoff; m->gobble(in2);
 580                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 581
 582                                                 while(!in2.eof()) {
 583                                                         string tempName;
 584                                                         in2 >> tempName; m->gobble(in2);
 585                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 586                                                 }
 587                                                 in2.close();
 588                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 589                                         }
 590                                 }
 591                 #else
 592                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 593                 #endif
 594         #endif
 595                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 596
 597                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 598
 599                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 600
 601                 //****************** merge list file and create rabund and sabund files ******************************//
 602                 estart = time(NULL);
 603                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 604
 605                 #ifdef USE_MPI
 606                         if (pid == 0) { //only process 0 merges
 607                 #endif
 608
 609                 ListVector* listSingle;
 610                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 611
 612                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 613
 614                 mergeLists(listFileNames, labelBins, listSingle);
 615
 616                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 617
 618                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 619
 620                 m->mothurOutEndLine();
 621                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 622                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 623                 m->mothurOutEndLine();
 624
 625                 #ifdef USE_MPI
 626                         } //only process 0 merges
 627
 628                         //make everyone wait
 629                         MPI_Barrier(MPI_COMM_WORLD);
 630                 #endif
 631
 632                 return 0;
 633         }
 634         catch(exception& e) {
 635                 m->errorOut(e, "ClusterSplitCommand", "execute");
 636                 exit(1);
 637         }
 638 }
 639 //**********************************************************************************************************************
 640 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 641         try {
 642
 643                 map<float, int> labelBin;
 644                 vector<float> orderFloat;
 645                 int numSingleBins;
 646
 647                 //read in singletons
 648                 if (singleton != "none") {
 649                         ifstream in;
 650                         m->openInputFile(singleton, in);
 651
 652                         string firstCol, secondCol;
 653                         listSingle = new ListVector();
 654                         while (!in.eof()) {
 655                                 in >> firstCol >> secondCol; m->gobble(in);
 656                                 listSingle->push_back(secondCol);
 657                         }
 658                         in.close();
 659                         remove(singleton.c_str());
 660
 661                         numSingleBins = listSingle->getNumBins();
 662                 }else{  listSingle = NULL; numSingleBins = 0;  }
 663
 664                 //go through users set and make them floats so we can sort them
 665                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 666                         float temp = -10.0;
 667
 668                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 669                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 670
 671                         if (temp <= cutoff) {
 672                                 orderFloat.push_back(temp);
 673                                 labelBin[temp] = numSingleBins; //initialize numbins
 674                         }
 675                 }
 676
 677                 //sort order
 678                 sort(orderFloat.begin(), orderFloat.end());
 679                 userLabels.clear();
 680
 681                 //get the list info from each file
 682                 for (int k = 0; k < listNames.size(); k++) {
 683
 684                         if (m->control_pressed) {
 685                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 686                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 687                                 return labelBin;
 688                         }
 689
 690                         InputData* input = new InputData(listNames[k], "list");
 691                         ListVector* list = input->getListVector();
 692                         string lastLabel = list->getLabel();
 693
 694                         string filledInList = listNames[k] + "filledInTemp";
 695                         ofstream outFilled;
 696                         m->openOutputFile(filledInList, outFilled);
 697
 698                         //for each label needed
 699                         for(int l = 0; l < orderFloat.size(); l++){
 700
 701                                 string thisLabel;
 702                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 703                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 704
 705                                 //this file has reached the end
 706                                 if (list == NULL) {
 707                                         list = input->getListVector(lastLabel, true);
 708                                 }else{  //do you have the distance, or do you need to fill in
 709
 710                                         float labelFloat;
 711                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 712                                         else { convert(list->getLabel(), labelFloat); }
 713
 714                                         //check for missing labels
 715                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 716                                                 //if its bigger get last label, otherwise keep it
 717                                                 delete list;
 718                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 719                                         }
 720                                         lastLabel = list->getLabel();
 721                                 }
 722
 723                                 //print to new file
 724                                 list->setLabel(thisLabel);
 725                                 list->print(outFilled);
 726
 727                                 //update labelBin
 728                                 labelBin[orderFloat[l]] += list->getNumBins();
 729
 730                                 delete list;
 731
 732                                 list = input->getListVector();
 733                         }
 734
 735                         if (list != NULL) { delete list; }
 736                         delete input;
 737
 738                         outFilled.close();
 739                         remove(listNames[k].c_str());
 740                         rename(filledInList.c_str(), listNames[k].c_str());
 741                 }
 742
 743                 return labelBin;
 744         }
 745         catch(exception& e) {
 746                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 747                 exit(1);
 748         }
 749 }
 750 //**********************************************************************************************************************
 751 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 752         try {
 753                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 754                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 755
 756                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 757                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 758                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 759
 760                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 761                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 762                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 763
 764                 map<float, int>::iterator itLabel;
 765
 766                 //for each label needed
 767                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 768
 769                         string thisLabel;
 770                         if (itLabel->first == -1) { thisLabel = "unique"; }
 771                         else { thisLabel = toString(itLabel->first,  length-1);  }
 772
 773                         outList << thisLabel << '\t' << itLabel->second << '\t';
 774
 775                         RAbundVector* rabund = new RAbundVector();
 776                         rabund->setLabel(thisLabel);
 777
 778                         //add in singletons
 779                         if (listSingle != NULL) {
 780                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 781                                         outList << listSingle->get(j) << '\t';
 782                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 783                                 }
 784                         }
 785
 786                         //get the list info from each file
 787                         for (int k = 0; k < listNames.size(); k++) {
 788
 789                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 790
 791                                 InputData* input = new InputData(listNames[k], "list");
 792                                 ListVector* list = input->getListVector(thisLabel);
 793
 794                                 //this file has reached the end
 795                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 796                                 else {
 797                                         for (int j = 0; j < list->getNumBins(); j++) {
 798                                                 outList << list->get(j) << '\t';
 799                                                 rabund->push_back(m->getNumNames(list->get(j)));
 800                                         }
 801                                         delete list;
 802                                 }
 803                                 delete input;
 804                         }
 805
 806                         SAbundVector sabund = rabund->getSAbundVector();
 807
 808                         sabund.print(outSabund);
 809                         rabund->print(outRabund);
 810                         outList << endl;
 811
 812                         delete rabund;
 813                 }
 814
 815                 outList.close();
 816                 outRabund.close();
 817                 outSabund.close();
 818
 819                 if (listSingle != NULL) { delete listSingle;  }
 820
 821                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 822
 823                 return 0;
 824         }
 825         catch(exception& e) {
 826                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 827                 exit(1);
 828         }
 829 }
 830
 831 //**********************************************************************************************************************
 832
 833 void ClusterSplitCommand::printData(ListVector* oldList){
 834         try {
 835                 string label = oldList->getLabel();
 836                 RAbundVector oldRAbund = oldList->getRAbundVector();
 837
 838                 oldRAbund.setLabel(label);
 839                 if (m->isTrue(showabund)) {
 840                         oldRAbund.getSAbundVector().print(cout);
 841                 }
 842                 oldRAbund.print(outRabund);
 843                 oldRAbund.getSAbundVector().print(outSabund);
 844
 845                 oldList->print(outList);
 846         }
 847         catch(exception& e) {
 848                 m->errorOut(e, "ClusterSplitCommand", "printData");
 849                 exit(1);
 850         }
 851 }
 852 //**********************************************************************************************************************
 853 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 854         try {
 855
 856         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 857                 int process = 0;
 858                 int exitCommand = 1;
 859                 processIDS.clear();
 860
 861                 //loop through and create all the processes you want
 862                 while (process != processors) {
 863                         int pid = fork();
 864
 865                         if (pid > 0) {
 866                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 867                                 process++;
 868                         }else if (pid == 0){
 869                                 set<string> labels;
 870                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 871
 872                                 //write out names to file
 873                                 string filename = toString(getpid()) + ".temp";
 874                                 ofstream out;
 875                                 m->openOutputFile(filename, out);
 876                                 out << tag << endl;
 877                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 878                                 out.close();
 879
 880                                 //print out labels
 881                                 ofstream outLabels;
 882                                 filename = toString(getpid()) + ".temp.labels";
 883                                 m->openOutputFile(filename, outLabels);
 884
 885                                 outLabels << cutoff << endl;
 886                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 887                                         outLabels << (*it) << endl;
 888                                 }
 889                                 outLabels.close();
 890
 891                                 exit(0);
 892                         }else {
 893                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 894                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 895                                 exit(0);
 896                         }
 897                 }
 898
 899                 //force parent to wait until all the processes are done
 900                 for (int i=0;i<processors;i++) {
 901                         int temp = processIDS[i];
 902                         wait(&temp);
 903                 }
 904
 905                 return exitCommand;
 906         #endif
 907
 908         }
 909         catch(exception& e) {
 910                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 911                 exit(1);
 912         }
 913 }
 914 //**********************************************************************************************************************
 915
 916 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 917         try {
 918                 Cluster* cluster;
 919                 SparseMatrix* matrix;
 920                 ListVector* list;
 921                 ListVector oldList;
 922                 RAbundVector* rabund;
 923
 924                 vector<string> listFileNames;
 925
 926                 double smallestCutoff = cutoff;
 927
 928                 //cluster each distance file
 929                 for (int i = 0; i < distNames.size(); i++) {
 930                         if (m->control_pressed) { return listFileNames; }
 931
 932                         string thisNamefile = distNames[i].begin()->second;
 933                         string thisDistFile = distNames[i].begin()->first;
 934
 935                         //read in distance file
 936                         globaldata->setNameFile(thisNamefile);
 937                         globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
 938
 939                         #ifdef USE_MPI
 940                                 int pid;
 941                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 942
 943                                 //output your files too
 944                                 if (pid != 0) {
 945                                         cout << endl << "Reading " << thisDistFile << endl;
 946                                 }
 947                         #endif
 948
 949                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 950
 951                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 952                         read->setCutoff(cutoff);
 953
 954                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 955                         nameMap->readMap();
 956                         read->read(nameMap);
 957
 958                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
 959
 960                         list = read->getListVector();
 961                         oldList = *list;
 962                         matrix = read->getMatrix();
 963
 964                         delete read;
 965                         delete nameMap;
 966
 967
 968                         #ifdef USE_MPI
 969                                 //output your files too
 970                                 if (pid != 0) {
 971                                         cout << endl << "Clustering " << thisDistFile << endl;
 972                                 }
 973                         #endif
 974
 975                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
 976
 977                         rabund = new RAbundVector(list->getRAbundVector());
 978
 979                         //create cluster
 980                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
 981                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
 982                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
 983                         tag = cluster->getTag();
 984
 985                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
 986                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
 987
 988                         ofstream listFile;
 989                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
 990
 991                         listFileNames.push_back(fileroot+ tag + ".list");
 992
 993                         float previousDist = 0.00000;
 994                         float rndPreviousDist = 0.00000;
 995
 996                         oldList = *list;
 997
 998                         print_start = true;
 999                         start = time(NULL);
1000                         double saveCutoff = cutoff;
1001
1002                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1003
1004                                 if (m->control_pressed) { //clean up
1005                                         delete matrix; delete list;     delete cluster; delete rabund;
1006                                         listFile.close();
1007                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1008                                         listFileNames.clear(); return listFileNames;
1009                                 }
1010
1011                                 cluster->update(saveCutoff);
1012
1013                                 float dist = matrix->getSmallDist();
1014                                 float rndDist;
1015                                 if (hard) {
1016                                         rndDist = m->ceilDist(dist, precision);
1017                                 }else{
1018                                         rndDist = m->roundDist(dist, precision);
1019                                 }
1020
1021                                 if(previousDist <= 0.0000 && dist != previousDist){
1022                                         oldList.setLabel("unique");
1023                                         oldList.print(listFile);
1024                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1025                                 }
1026                                 else if(rndDist != rndPreviousDist){
1027                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1028                                         oldList.print(listFile);
1029                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1030                                 }
1031
1032                                 previousDist = dist;
1033                                 rndPreviousDist = rndDist;
1034                                 oldList = *list;
1035                         }
1036
1037
1038                         if(previousDist <= 0.0000){
1039                                 oldList.setLabel("unique");
1040                                 oldList.print(listFile);
1041                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1042                         }
1043                         else if(rndPreviousDist<cutoff){
1044                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1045                                 oldList.print(listFile);
1046                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1047                         }
1048
1049                         delete matrix; delete list;     delete cluster; delete rabund;
1050                         listFile.close();
1051
1052                         if (m->control_pressed) { //clean up
1053                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1054                                 listFileNames.clear(); return listFileNames;
1055                         }
1056
1057                         remove(thisDistFile.c_str());
1058                         remove(thisNamefile.c_str());
1059
1060                         if (saveCutoff != cutoff) {
1061                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1062                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1063
1064                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1065                         }
1066
1067                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1068                 }
1069
1070                 cutoff = smallestCutoff;
1071
1072                 return listFileNames;
1073
1074         }
1075         catch(exception& e) {
1076                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1077                 exit(1);
1078         }
1079
1080
1081 }
1082 //**********************************************************************************************************************
1083
1084 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1085         try{
1086
1087 #ifdef USE_MPI
1088                 int pid;
1089                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1090
1091                 if (pid != 0) {
1092 #endif
1093
1094                 string thisOutputDir = outputDir;
1095                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1096                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1097                 remove(outputFileName.c_str());
1098
1099
1100                 for (int i = 0; i < distNames.size(); i++) {
1101                         if (m->control_pressed) {  return 0; }
1102
1103                         string thisDistFile = distNames[i].begin()->first;
1104
1105                         m->appendFiles(thisDistFile, outputFileName);
1106                 }
1107
1108                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1109
1110 #ifdef USE_MPI
1111                 }
1112 #endif
1113
1114                 return 0;
1115
1116
1117         }
1118         catch(exception& e) {
1119                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1120                 exit(1);
1121         }
1122 }
1123 //**********************************************************************************************************************