clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::getValidParameters(){
  21         try {
  22                 string AlignArray[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  23                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
  24                 return myArray;
  25         }
  26         catch(exception& e) {
  27                 m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
  28                 exit(1);
  29         }
  30 }
  31 //**********************************************************************************************************************
  32 ClusterSplitCommand::ClusterSplitCommand(){
  33         try {
  34                 abort = true;
  35                 //initialize outputTypes
  36                 vector<string> tempOutNames;
  37                 outputTypes["list"] = tempOutNames;
  38                 outputTypes["rabund"] = tempOutNames;
  39                 outputTypes["sabund"] = tempOutNames;
  40                 outputTypes["column"] = tempOutNames;
  41         }
  42         catch(exception& e) {
  43                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  44                 exit(1);
  45         }
  46 }
  47 //**********************************************************************************************************************
  48 vector<string> ClusterSplitCommand::getRequiredParameters(){
  49         try {
  50                 string Array[] =  {"fasta","phylip","column","or"};
  51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  52                 return myArray;
  53         }
  54         catch(exception& e) {
  55                 m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
  56                 exit(1);
  57         }
  58 }
  59 //**********************************************************************************************************************
  60 vector<string> ClusterSplitCommand::getRequiredFiles(){
  61         try {
  62                 vector<string> myArray;
  63                 return myArray;
  64         }
  65         catch(exception& e) {
  66                 m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
  67                 exit(1);
  68         }
  69 }
  70 //**********************************************************************************************************************
  71 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
  72 ClusterSplitCommand::ClusterSplitCommand(string option)  {
  73         try{
  74                 globaldata = GlobalData::getInstance();
  75                 abort = false;
  76                 format = "";
  77
  78                 //allow user to run help
  79                 if(option == "help") { help(); abort = true; }
  80
  81                 else {
  82                         //valid paramters for this command
  83                         string Array[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  84                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  85
  86                         OptionParser parser(option);
  87                         map<string,string> parameters = parser.getParameters();
  88
  89                         ValidParameters validParameter("cluster.split");
  90
  91                         //check to make sure all parameters are valid for command
  92                         map<string,string>::iterator it;
  93                         for (it = parameters.begin(); it != parameters.end(); it++) {
  94                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
  95                                         abort = true;
  96                                 }
  97                         }
  98
  99                         //initialize outputTypes
 100                         vector<string> tempOutNames;
 101                         outputTypes["list"] = tempOutNames;
 102                         outputTypes["rabund"] = tempOutNames;
 103                         outputTypes["sabund"] = tempOutNames;
 104                         outputTypes["column"] = tempOutNames;
 105
 106                         globaldata->newRead();
 107
 108                         //if the user changes the output directory command factory will send this info to us in the output parameter
 109                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 110
 111                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 113                         if (inputDir == "not found"){   inputDir = "";          }
 114                         else {
 115                                 string path;
 116                                 it = parameters.find("phylip");
 117                                 //user has given a template file
 118                                 if(it != parameters.end()){
 119                                         path = m->hasPath(it->second);
 120                                         //if the user has not given a path then, add inputdir. else leave path alone.
 121                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 122                                 }
 123
 124                                 it = parameters.find("column");
 125                                 //user has given a template file
 126                                 if(it != parameters.end()){
 127                                         path = m->hasPath(it->second);
 128                                         //if the user has not given a path then, add inputdir. else leave path alone.
 129                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 130                                 }
 131
 132                                 it = parameters.find("name");
 133                                 //user has given a template file
 134                                 if(it != parameters.end()){
 135                                         path = m->hasPath(it->second);
 136                                         //if the user has not given a path then, add inputdir. else leave path alone.
 137                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 138                                 }
 139
 140                                 it = parameters.find("taxonomy");
 141                                 //user has given a template file
 142                                 if(it != parameters.end()){
 143                                         path = m->hasPath(it->second);
 144                                         //if the user has not given a path then, add inputdir. else leave path alone.
 145                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 146                                 }
 147
 148                                 it = parameters.find("fasta");
 149                                 //user has given a template file
 150                                 if(it != parameters.end()){
 151                                         path = m->hasPath(it->second);
 152                                         //if the user has not given a path then, add inputdir. else leave path alone.
 153                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 154                                 }
 155                         }
 156
 157                         //check for required parameters
 158                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 159                         if (phylipfile == "not open") { abort = true; }
 160                         else if (phylipfile == "not found") { phylipfile = ""; }
 161                         else {  distfile = phylipfile;  format = "phylip";      }
 162
 163                         columnfile = validParameter.validFile(parameters, "column", true);
 164                         if (columnfile == "not open") { abort = true; }
 165                         else if (columnfile == "not found") { columnfile = ""; }
 166                         else {  distfile = columnfile; format = "column";       }
 167
 168                         namefile = validParameter.validFile(parameters, "name", true);
 169                         if (namefile == "not open") { abort = true; }
 170                         else if (namefile == "not found") { namefile = ""; }
 171
 172                         fastafile = validParameter.validFile(parameters, "fasta", true);
 173                         if (fastafile == "not open") { abort = true; }
 174                         else if (fastafile == "not found") { fastafile = ""; }
 175                         else { distfile = fastafile;  splitmethod = "fasta";  }
 176
 177                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 178                         if (taxFile == "not open") { abort = true; }
 179                         else if (taxFile == "not found") { taxFile = ""; }
 180
 181                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); abort = true; }
 182                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 183
 184                         if (columnfile != "") {
 185                                 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
 186                         }
 187
 188                         if (fastafile != "") {
 189                                 if (taxFile == "") { m->mothurOut("You need to provide a taxonomy file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 190                                 if (namefile == "") { m->mothurOut("You need to provide a names file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 191                         }
 192
 193                         //check for optional parameter and set defaults
 194                         // ...at some point should added some additional type checking...
 195                         //get user cutoff and precision or use defaults
 196                         string temp;
 197                         temp = validParameter.validFile(parameters, "precision", false);
 198                         if (temp == "not found") { temp = "100"; }
 199                         //saves precision legnth for formatting below
 200                         length = temp.length();
 201                         convert(temp, precision);
 202
 203                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
 204                         hard = m->isTrue(temp);
 205
 206                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 207                         large = m->isTrue(temp);
 208
 209                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
 210                         convert(temp, processors);
 211
 212                         temp = validParameter.validFile(parameters, "splitmethod", false);
 213                         if (splitmethod != "fasta") {
 214                                 if (temp == "not found")  { splitmethod = "distance"; }
 215                                 else {  splitmethod = temp; }
 216                         }
 217
 218                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 219                         convert(temp, cutoff);
 220                         cutoff += (5 / (precision * 10.0));
 221
 222                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 223                         convert(temp, taxLevelCutoff);
 224
 225                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
 226
 227                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 228                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 229
 230                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 231                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 232
 233                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 234
 235                         showabund = validParameter.validFile(parameters, "showabund", false);
 236                         if (showabund == "not found") { showabund = "T"; }
 237
 238                         timing = validParameter.validFile(parameters, "timing", false);
 239                         if (timing == "not found") { timing = "F"; }
 240
 241                 }
 242         }
 243         catch(exception& e) {
 244                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 245                 exit(1);
 246         }
 247 }
 248
 249 //**********************************************************************************************************************
 250
 251 void ClusterSplitCommand::help(){
 252         try {
 253                 m->mothurOut("The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n");
 254                 m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n");
 255                 m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n");
 256                 m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n");
 257                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n");
 258                 m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n");
 259                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n");
 260                 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
 261                 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file. \n");
 262                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
 263                 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
 264                 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
 265                 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
 266                 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
 267                 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
 268                 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
 269                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
 270                 #ifdef USE_MPI
 271                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
 272                 #endif
 273                 m->mothurOut("The cluster.split command should be in the following format: \n");
 274                 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
 275                 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");
 276
 277         }
 278         catch(exception& e) {
 279                 m->errorOut(e, "ClusterSplitCommand", "help");
 280                 exit(1);
 281         }
 282 }
 283
 284 //**********************************************************************************************************************
 285
 286 ClusterSplitCommand::~ClusterSplitCommand(){}
 287
 288 //**********************************************************************************************************************
 289
 290 int ClusterSplitCommand::execute(){
 291         try {
 292
 293                 if (abort == true) {    return 0;       }
 294
 295                 time_t estart;
 296                 vector<string> listFileNames;
 297                 set<string> labels;
 298                 string singletonName = "";
 299                 double saveCutoff = cutoff;
 300
 301                 //****************** file prep work ******************************//
 302                 #ifdef USE_MPI
 303                         int pid;
 304                         int tag = 2001;
 305                         MPI_Status status;
 306                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 307                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 308
 309                         if (pid == 0) { //only process 0 converts and splits
 310
 311                 #endif
 312
 313                 //if user gave a phylip file convert to column file
 314                 if (format == "phylip") {
 315                         estart = time(NULL);
 316                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 317
 318                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 319
 320                         NameAssignment* nameMap = NULL;
 321                         convert->setFormat("phylip");
 322                         convert->read(nameMap);
 323
 324                         if (m->control_pressed) {  delete convert;  return 0;  }
 325
 326                         distfile = convert->getOutputFile();
 327
 328                         //if no names file given with phylip file, create it
 329                         ListVector* listToMakeNameFile =  convert->getListVector();
 330                         if (namefile == "") {  //you need to make a namefile for split matrix
 331                                 ofstream out;
 332                                 namefile = phylipfile + ".names";
 333                                 m->openOutputFile(namefile, out);
 334                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 335                                         string bin = listToMakeNameFile->get(i);
 336                                         out << bin << '\t' << bin << endl;
 337                                 }
 338                                 out.close();
 339                         }
 340                         delete listToMakeNameFile;
 341                         delete convert;
 342
 343                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 344                 }
 345                 if (m->control_pressed) { return 0; }
 346
 347                 estart = time(NULL);
 348                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 349
 350                 //split matrix into non-overlapping groups
 351                 SplitMatrix* split;
 352                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 353                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 354                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 355                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 356
 357                 split->split();
 358
 359                 if (m->control_pressed) { delete split; return 0; }
 360
 361                 singletonName = split->getSingletonNames();
 362                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 363                 delete split;
 364
 365                 //output a merged distance file
 366                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 367
 368
 369                 if (m->control_pressed) { return 0; }
 370
 371                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 372                 estart = time(NULL);
 373
 374                 //****************** break up files between processes and cluster each file set ******************************//
 375         #ifdef USE_MPI
 376                         ////you are process 0 from above////
 377
 378                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 379                         dividedNames.resize(processors);
 380
 381                         //for each file group figure out which process will complete it
 382                         //want to divide the load intelligently so the big files are spread between processes
 383                         int count = 1;
 384                         for (int i = 0; i < distName.size(); i++) {
 385                                 int processToAssign = (i+1) % processors;
 386                                 if (processToAssign == 0) { processToAssign = processors; }
 387
 388                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 389                         }
 390
 391                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 392                         for (int i = 0; i < processors; i++) {
 393                                 int remainder = ((i+1) % processors);
 394                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 395                         }
 396
 397
 398                         //send each child the list of files it needs to process
 399                         for(int i = 1; i < processors; i++) {
 400                                 //send number of file pairs
 401                                 int num = dividedNames[i].size();
 402                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 403
 404                                 for (int j = 0; j < num; j++) { //send filenames to process i
 405                                         char tempDistFileName[1024];
 406                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 407                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 408
 409                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 410                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 411
 412                                         char tempNameFileName[1024];
 413                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 414                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 415
 416                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 417                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 418                                 }
 419                         }
 420
 421                         //process your share
 422                         listFileNames = cluster(dividedNames[0], labels);
 423
 424                         //receive the other processes info
 425                         for(int i = 1; i < processors; i++) {
 426                                 int num = dividedNames[i].size();
 427
 428                                 double tempCutoff;
 429                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 430                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 431
 432                                 //send list filenames to root process
 433                                 for (int j = 0; j < num; j++) {
 434                                         int lengthList = 0;
 435                                         char tempListFileName[1024];
 436
 437                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 438                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 439
 440                                         string myListFileName = tempListFileName;
 441                                         myListFileName = myListFileName.substr(0, lengthList);
 442
 443                                         listFileNames.push_back(myListFileName);
 444                                 }
 445
 446                                 //send Labels to root process
 447                                 int numLabels = 0;
 448                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 449
 450                                 for (int j = 0; j < numLabels; j++) {
 451                                         int lengthLabel = 0;
 452                                         char tempLabel[100];
 453
 454                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 455                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 456
 457                                         string myLabel = tempLabel;
 458                                         myLabel = myLabel.substr(0, lengthLabel);
 459
 460                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 461                                 }
 462                         }
 463
 464                 }else { //you are a child process
 465                         vector < map<string, string> >  myNames;
 466
 467                         //recieve the files you need to process
 468                         //receive number of file pairs
 469                         int num = 0;
 470                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 471
 472                         myNames.resize(num);
 473
 474                         for (int j = 0; j < num; j++) { //receive filenames to process
 475                                 int lengthDist = 0;
 476                                 char tempDistFileName[1024];
 477
 478                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 479                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 480
 481                                 string myDistFileName = tempDistFileName;
 482                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 483
 484                                 int lengthName = 0;
 485                                 char tempNameFileName[1024];
 486
 487                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 488                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 489
 490                                 string myNameFileName = tempNameFileName;
 491                                 myNameFileName = myNameFileName.substr(0, lengthName);
 492
 493                                 //save file name
 494                                 myNames[j][myDistFileName] = myNameFileName;
 495                         }
 496
 497                         //process them
 498                         listFileNames = cluster(myNames, labels);
 499
 500                         //send cutoff
 501                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 502
 503                         //send list filenames to root process
 504                         for (int j = 0; j < num; j++) {
 505                                 char tempListFileName[1024];
 506                                 strcpy(tempListFileName, listFileNames[j].c_str());
 507                                 int lengthList = listFileNames[j].length();
 508
 509                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 510                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 511                         }
 512
 513                         //send Labels to root process
 514                         int numLabels = labels.size();
 515                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 516
 517                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 518                                 char tempLabel[100];
 519                                 strcpy(tempLabel, (*it).c_str());
 520                                 int lengthLabel = (*it).length();
 521
 522                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 523                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 524                         }
 525                 }
 526
 527                 //make everyone wait
 528                 MPI_Barrier(MPI_COMM_WORLD);
 529
 530         #else
 531
 532                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 533                                 if(processors == 1){
 534                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 535                                 }else{
 536                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 537                                         dividedNames.resize(processors);
 538
 539                                         //for each file group figure out which process will complete it
 540                                         //want to divide the load intelligently so the big files are spread between processes
 541                                         for (int i = 0; i < distName.size(); i++) {
 542                                                 int processToAssign = (i+1) % processors;
 543                                                 if (processToAssign == 0) { processToAssign = processors; }
 544
 545                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 546                                         }
 547
 548                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 549                                         for (int i = 0; i < processors; i++) {
 550                                                 int remainder = ((i+1) % processors);
 551                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 552                                         }
 553
 554                                         createProcesses(dividedNames);
 555
 556                                         if (m->control_pressed) { return 0; }
 557
 558                                         //get list of list file names from each process
 559                                         for(int i=0;i<processors;i++){
 560                                                 string filename = toString(processIDS[i]) + ".temp";
 561                                                 ifstream in;
 562                                                 m->openInputFile(filename, in);
 563
 564                                                 in >> tag; m->gobble(in);
 565
 566                                                 while(!in.eof()) {
 567                                                         string tempName;
 568                                                         in >> tempName; m->gobble(in);
 569                                                         listFileNames.push_back(tempName);
 570                                                 }
 571                                                 in.close();
 572                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 573
 574                                                 //get labels
 575                                                 filename = toString(processIDS[i]) + ".temp.labels";
 576                                                 ifstream in2;
 577                                                 m->openInputFile(filename, in2);
 578
 579                                                 float tempCutoff;
 580                                                 in2 >> tempCutoff; m->gobble(in2);
 581                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 582
 583                                                 while(!in2.eof()) {
 584                                                         string tempName;
 585                                                         in2 >> tempName; m->gobble(in2);
 586                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 587                                                 }
 588                                                 in2.close();
 589                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 590                                         }
 591                                 }
 592                 #else
 593                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 594                 #endif
 595         #endif
 596                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 597
 598                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 599
 600                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 601
 602                 //****************** merge list file and create rabund and sabund files ******************************//
 603                 estart = time(NULL);
 604                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 605
 606                 #ifdef USE_MPI
 607                         if (pid == 0) { //only process 0 merges
 608                 #endif
 609
 610                 ListVector* listSingle;
 611                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 612
 613                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 614
 615                 mergeLists(listFileNames, labelBins, listSingle);
 616
 617                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 618
 619                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 620
 621                 m->mothurOutEndLine();
 622                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 623                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 624                 m->mothurOutEndLine();
 625
 626                 #ifdef USE_MPI
 627                         } //only process 0 merges
 628
 629                         //make everyone wait
 630                         MPI_Barrier(MPI_COMM_WORLD);
 631                 #endif
 632
 633                 return 0;
 634         }
 635         catch(exception& e) {
 636                 m->errorOut(e, "ClusterSplitCommand", "execute");
 637                 exit(1);
 638         }
 639 }
 640 //**********************************************************************************************************************
 641 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 642         try {
 643
 644                 map<float, int> labelBin;
 645                 vector<float> orderFloat;
 646                 int numSingleBins;
 647
 648                 //read in singletons
 649                 if (singleton != "none") {
 650                         ifstream in;
 651                         m->openInputFile(singleton, in);
 652
 653                         string firstCol, secondCol;
 654                         listSingle = new ListVector();
 655                         while (!in.eof()) {
 656                                 in >> firstCol >> secondCol; m->gobble(in);
 657                                 listSingle->push_back(secondCol);
 658                         }
 659                         in.close();
 660                         remove(singleton.c_str());
 661
 662                         numSingleBins = listSingle->getNumBins();
 663                 }else{  listSingle = NULL; numSingleBins = 0;  }
 664
 665                 //go through users set and make them floats so we can sort them
 666                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 667                         float temp = -10.0;
 668
 669                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 670                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 671
 672                         if (temp <= cutoff) {
 673                                 orderFloat.push_back(temp);
 674                                 labelBin[temp] = numSingleBins; //initialize numbins
 675                         }
 676                 }
 677
 678                 //sort order
 679                 sort(orderFloat.begin(), orderFloat.end());
 680                 userLabels.clear();
 681
 682                 //get the list info from each file
 683                 for (int k = 0; k < listNames.size(); k++) {
 684
 685                         if (m->control_pressed) {
 686                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 687                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 688                                 return labelBin;
 689                         }
 690
 691                         InputData* input = new InputData(listNames[k], "list");
 692                         ListVector* list = input->getListVector();
 693                         string lastLabel = list->getLabel();
 694
 695                         string filledInList = listNames[k] + "filledInTemp";
 696                         ofstream outFilled;
 697                         m->openOutputFile(filledInList, outFilled);
 698
 699                         //for each label needed
 700                         for(int l = 0; l < orderFloat.size(); l++){
 701
 702                                 string thisLabel;
 703                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 704                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 705
 706                                 //this file has reached the end
 707                                 if (list == NULL) {
 708                                         list = input->getListVector(lastLabel, true);
 709                                 }else{  //do you have the distance, or do you need to fill in
 710
 711                                         float labelFloat;
 712                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 713                                         else { convert(list->getLabel(), labelFloat); }
 714
 715                                         //check for missing labels
 716                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 717                                                 //if its bigger get last label, otherwise keep it
 718                                                 delete list;
 719                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 720                                         }
 721                                         lastLabel = list->getLabel();
 722                                 }
 723
 724                                 //print to new file
 725                                 list->setLabel(thisLabel);
 726                                 list->print(outFilled);
 727
 728                                 //update labelBin
 729                                 labelBin[orderFloat[l]] += list->getNumBins();
 730
 731                                 delete list;
 732
 733                                 list = input->getListVector();
 734                         }
 735
 736                         if (list != NULL) { delete list; }
 737                         delete input;
 738
 739                         outFilled.close();
 740                         remove(listNames[k].c_str());
 741                         rename(filledInList.c_str(), listNames[k].c_str());
 742                 }
 743
 744                 return labelBin;
 745         }
 746         catch(exception& e) {
 747                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 748                 exit(1);
 749         }
 750 }
 751 //**********************************************************************************************************************
 752 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 753         try {
 754                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 755                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 756
 757                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 758                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 759                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 760
 761                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 762                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 763                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 764
 765                 map<float, int>::iterator itLabel;
 766
 767                 //for each label needed
 768                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 769
 770                         string thisLabel;
 771                         if (itLabel->first == -1) { thisLabel = "unique"; }
 772                         else { thisLabel = toString(itLabel->first,  length-1);  }
 773
 774                         outList << thisLabel << '\t' << itLabel->second << '\t';
 775
 776                         RAbundVector* rabund = new RAbundVector();
 777                         rabund->setLabel(thisLabel);
 778
 779                         //add in singletons
 780                         if (listSingle != NULL) {
 781                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 782                                         outList << listSingle->get(j) << '\t';
 783                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 784                                 }
 785                         }
 786
 787                         //get the list info from each file
 788                         for (int k = 0; k < listNames.size(); k++) {
 789
 790                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 791
 792                                 InputData* input = new InputData(listNames[k], "list");
 793                                 ListVector* list = input->getListVector(thisLabel);
 794
 795                                 //this file has reached the end
 796                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 797                                 else {
 798                                         for (int j = 0; j < list->getNumBins(); j++) {
 799                                                 outList << list->get(j) << '\t';
 800                                                 rabund->push_back(m->getNumNames(list->get(j)));
 801                                         }
 802                                         delete list;
 803                                 }
 804                                 delete input;
 805                         }
 806
 807                         SAbundVector sabund = rabund->getSAbundVector();
 808
 809                         sabund.print(outSabund);
 810                         rabund->print(outRabund);
 811                         outList << endl;
 812
 813                         delete rabund;
 814                 }
 815
 816                 outList.close();
 817                 outRabund.close();
 818                 outSabund.close();
 819
 820                 if (listSingle != NULL) { delete listSingle;  }
 821
 822                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 823
 824                 return 0;
 825         }
 826         catch(exception& e) {
 827                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 828                 exit(1);
 829         }
 830 }
 831
 832 //**********************************************************************************************************************
 833
 834 void ClusterSplitCommand::printData(ListVector* oldList){
 835         try {
 836                 string label = oldList->getLabel();
 837                 RAbundVector oldRAbund = oldList->getRAbundVector();
 838
 839                 oldRAbund.setLabel(label);
 840                 if (m->isTrue(showabund)) {
 841                         oldRAbund.getSAbundVector().print(cout);
 842                 }
 843                 oldRAbund.print(outRabund);
 844                 oldRAbund.getSAbundVector().print(outSabund);
 845
 846                 oldList->print(outList);
 847         }
 848         catch(exception& e) {
 849                 m->errorOut(e, "ClusterSplitCommand", "printData");
 850                 exit(1);
 851         }
 852 }
 853 //**********************************************************************************************************************
 854 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 855         try {
 856
 857         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 858                 int process = 0;
 859                 int exitCommand = 1;
 860                 processIDS.clear();
 861
 862                 //loop through and create all the processes you want
 863                 while (process != processors) {
 864                         int pid = fork();
 865
 866                         if (pid > 0) {
 867                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 868                                 process++;
 869                         }else if (pid == 0){
 870                                 set<string> labels;
 871                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 872
 873                                 //write out names to file
 874                                 string filename = toString(getpid()) + ".temp";
 875                                 ofstream out;
 876                                 m->openOutputFile(filename, out);
 877                                 out << tag << endl;
 878                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 879                                 out.close();
 880
 881                                 //print out labels
 882                                 ofstream outLabels;
 883                                 filename = toString(getpid()) + ".temp.labels";
 884                                 m->openOutputFile(filename, outLabels);
 885
 886                                 outLabels << cutoff << endl;
 887                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 888                                         outLabels << (*it) << endl;
 889                                 }
 890                                 outLabels.close();
 891
 892                                 exit(0);
 893                         }else {
 894                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 895                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 896                                 exit(0);
 897                         }
 898                 }
 899
 900                 //force parent to wait until all the processes are done
 901                 for (int i=0;i<processors;i++) {
 902                         int temp = processIDS[i];
 903                         wait(&temp);
 904                 }
 905
 906                 return exitCommand;
 907         #endif
 908
 909         }
 910         catch(exception& e) {
 911                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 912                 exit(1);
 913         }
 914 }
 915 //**********************************************************************************************************************
 916
 917 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 918         try {
 919                 Cluster* cluster;
 920                 SparseMatrix* matrix;
 921                 ListVector* list;
 922                 ListVector oldList;
 923                 RAbundVector* rabund;
 924
 925                 vector<string> listFileNames;
 926
 927                 double smallestCutoff = cutoff;
 928
 929                 //cluster each distance file
 930                 for (int i = 0; i < distNames.size(); i++) {
 931                         if (m->control_pressed) { return listFileNames; }
 932
 933                         string thisNamefile = distNames[i].begin()->second;
 934                         string thisDistFile = distNames[i].begin()->first;
 935
 936                         //read in distance file
 937                         globaldata->setNameFile(thisNamefile);
 938                         globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
 939
 940                         #ifdef USE_MPI
 941                                 int pid;
 942                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 943
 944                                 //output your files too
 945                                 if (pid != 0) {
 946                                         cout << endl << "Reading " << thisDistFile << endl;
 947                                 }
 948                         #endif
 949
 950                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 951
 952                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 953                         read->setCutoff(cutoff);
 954
 955                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 956                         nameMap->readMap();
 957                         read->read(nameMap);
 958
 959                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
 960
 961                         list = read->getListVector();
 962                         oldList = *list;
 963                         matrix = read->getMatrix();
 964
 965                         delete read;
 966                         delete nameMap;
 967
 968
 969                         #ifdef USE_MPI
 970                                 //output your files too
 971                                 if (pid != 0) {
 972                                         cout << endl << "Clustering " << thisDistFile << endl;
 973                                 }
 974                         #endif
 975
 976                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
 977
 978                         rabund = new RAbundVector(list->getRAbundVector());
 979
 980                         //create cluster
 981                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
 982                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
 983                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
 984                         tag = cluster->getTag();
 985
 986                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
 987                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
 988
 989                         ofstream listFile;
 990                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
 991
 992                         listFileNames.push_back(fileroot+ tag + ".list");
 993
 994                         float previousDist = 0.00000;
 995                         float rndPreviousDist = 0.00000;
 996
 997                         oldList = *list;
 998
 999                         print_start = true;
1000                         start = time(NULL);
1001                         double saveCutoff = cutoff;
1002
1003                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1004
1005                                 if (m->control_pressed) { //clean up
1006                                         delete matrix; delete list;     delete cluster; delete rabund;
1007                                         listFile.close();
1008                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1009                                         listFileNames.clear(); return listFileNames;
1010                                 }
1011
1012                                 cluster->update(saveCutoff);
1013
1014                                 float dist = matrix->getSmallDist();
1015                                 float rndDist;
1016                                 if (hard) {
1017                                         rndDist = m->ceilDist(dist, precision);
1018                                 }else{
1019                                         rndDist = m->roundDist(dist, precision);
1020                                 }
1021
1022                                 if(previousDist <= 0.0000 && dist != previousDist){
1023                                         oldList.setLabel("unique");
1024                                         oldList.print(listFile);
1025                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1026                                 }
1027                                 else if(rndDist != rndPreviousDist){
1028                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1029                                         oldList.print(listFile);
1030                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1031                                 }
1032
1033                                 previousDist = dist;
1034                                 rndPreviousDist = rndDist;
1035                                 oldList = *list;
1036                         }
1037
1038
1039                         if(previousDist <= 0.0000){
1040                                 oldList.setLabel("unique");
1041                                 oldList.print(listFile);
1042                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1043                         }
1044                         else if(rndPreviousDist<cutoff){
1045                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1046                                 oldList.print(listFile);
1047                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1048                         }
1049
1050                         delete matrix; delete list;     delete cluster; delete rabund;
1051                         listFile.close();
1052
1053                         if (m->control_pressed) { //clean up
1054                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1055                                 listFileNames.clear(); return listFileNames;
1056                         }
1057
1058                         remove(thisDistFile.c_str());
1059                         remove(thisNamefile.c_str());
1060
1061                         if (saveCutoff != cutoff) {
1062                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1063                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1064
1065                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1066                         }
1067
1068                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1069                 }
1070
1071                 cutoff = smallestCutoff;
1072
1073                 return listFileNames;
1074
1075         }
1076         catch(exception& e) {
1077                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1078                 exit(1);
1079         }
1080
1081
1082 }
1083 //**********************************************************************************************************************
1084
1085 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1086         try{
1087
1088 #ifdef USE_MPI
1089                 int pid;
1090                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1091
1092                 if (pid != 0) {
1093 #endif
1094
1095                 string thisOutputDir = outputDir;
1096                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1097                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1098                 remove(outputFileName.c_str());
1099
1100
1101                 for (int i = 0; i < distNames.size(); i++) {
1102                         if (m->control_pressed) {  return 0; }
1103
1104                         string thisDistFile = distNames[i].begin()->first;
1105
1106                         m->appendFiles(thisDistFile, outputFileName);
1107                 }
1108
1109                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1110
1111 #ifdef USE_MPI
1112                 }
1113 #endif
1114
1115
1116
1117
1118         }
1119         catch(exception& e) {
1120                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1121                 exit(1);
1122         }
1123 }
1124 //**********************************************************************************************************************