clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::getValidParameters(){
  21         try {
  22                 string AlignArray[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  23                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
  24                 return myArray;
  25         }
  26         catch(exception& e) {
  27                 m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
  28                 exit(1);
  29         }
  30 }
  31 //**********************************************************************************************************************
  32 ClusterSplitCommand::ClusterSplitCommand(){
  33         try {
  34                 abort = true; calledHelp = true;
  35                 vector<string> tempOutNames;
  36                 outputTypes["list"] = tempOutNames;
  37                 outputTypes["rabund"] = tempOutNames;
  38                 outputTypes["sabund"] = tempOutNames;
  39                 outputTypes["column"] = tempOutNames;
  40         }
  41         catch(exception& e) {
  42                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  43                 exit(1);
  44         }
  45 }
  46 //**********************************************************************************************************************
  47 vector<string> ClusterSplitCommand::getRequiredParameters(){
  48         try {
  49                 string Array[] =  {"fasta","phylip","column","or"};
  50                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  51                 return myArray;
  52         }
  53         catch(exception& e) {
  54                 m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
  55                 exit(1);
  56         }
  57 }
  58 //**********************************************************************************************************************
  59 vector<string> ClusterSplitCommand::getRequiredFiles(){
  60         try {
  61                 vector<string> myArray;
  62                 return myArray;
  63         }
  64         catch(exception& e) {
  65                 m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
  66                 exit(1);
  67         }
  68 }
  69 //**********************************************************************************************************************
  70 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
  71 ClusterSplitCommand::ClusterSplitCommand(string option)  {
  72         try{
  73                 globaldata = GlobalData::getInstance();
  74                 abort = false; calledHelp = false;
  75                 format = "";
  76
  77                 //allow user to run help
  78                 if(option == "help") { help(); abort = true; calledHelp = true; }
  79
  80                 else {
  81                         //valid paramters for this command
  82                         string Array[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  83                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  84
  85                         OptionParser parser(option);
  86                         map<string,string> parameters = parser.getParameters();
  87
  88                         ValidParameters validParameter("cluster.split");
  89
  90                         //check to make sure all parameters are valid for command
  91                         map<string,string>::iterator it;
  92                         for (it = parameters.begin(); it != parameters.end(); it++) {
  93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
  94                                         abort = true;
  95                                 }
  96                         }
  97
  98                         //initialize outputTypes
  99                         vector<string> tempOutNames;
 100                         outputTypes["list"] = tempOutNames;
 101                         outputTypes["rabund"] = tempOutNames;
 102                         outputTypes["sabund"] = tempOutNames;
 103                         outputTypes["column"] = tempOutNames;
 104
 105                         globaldata->newRead();
 106
 107                         //if the user changes the output directory command factory will send this info to us in the output parameter
 108                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 109
 110                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 112                         if (inputDir == "not found"){   inputDir = "";          }
 113                         else {
 114                                 string path;
 115                                 it = parameters.find("phylip");
 116                                 //user has given a template file
 117                                 if(it != parameters.end()){
 118                                         path = m->hasPath(it->second);
 119                                         //if the user has not given a path then, add inputdir. else leave path alone.
 120                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 121                                 }
 122
 123                                 it = parameters.find("column");
 124                                 //user has given a template file
 125                                 if(it != parameters.end()){
 126                                         path = m->hasPath(it->second);
 127                                         //if the user has not given a path then, add inputdir. else leave path alone.
 128                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 129                                 }
 130
 131                                 it = parameters.find("name");
 132                                 //user has given a template file
 133                                 if(it != parameters.end()){
 134                                         path = m->hasPath(it->second);
 135                                         //if the user has not given a path then, add inputdir. else leave path alone.
 136                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 137                                 }
 138
 139                                 it = parameters.find("taxonomy");
 140                                 //user has given a template file
 141                                 if(it != parameters.end()){
 142                                         path = m->hasPath(it->second);
 143                                         //if the user has not given a path then, add inputdir. else leave path alone.
 144                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 145                                 }
 146
 147                                 it = parameters.find("fasta");
 148                                 //user has given a template file
 149                                 if(it != parameters.end()){
 150                                         path = m->hasPath(it->second);
 151                                         //if the user has not given a path then, add inputdir. else leave path alone.
 152                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 153                                 }
 154                         }
 155
 156                         //check for required parameters
 157                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 158                         if (phylipfile == "not open") { abort = true; }
 159                         else if (phylipfile == "not found") { phylipfile = ""; }
 160                         else {  distfile = phylipfile;  format = "phylip";      }
 161
 162                         columnfile = validParameter.validFile(parameters, "column", true);
 163                         if (columnfile == "not open") { abort = true; }
 164                         else if (columnfile == "not found") { columnfile = ""; }
 165                         else {  distfile = columnfile; format = "column";       }
 166
 167                         namefile = validParameter.validFile(parameters, "name", true);
 168                         if (namefile == "not open") { abort = true; }
 169                         else if (namefile == "not found") { namefile = ""; }
 170
 171                         fastafile = validParameter.validFile(parameters, "fasta", true);
 172                         if (fastafile == "not open") { abort = true; }
 173                         else if (fastafile == "not found") { fastafile = ""; }
 174                         else { distfile = fastafile;  splitmethod = "fasta";  }
 175
 176                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 177                         if (taxFile == "not open") { abort = true; }
 178                         else if (taxFile == "not found") { taxFile = ""; }
 179
 180                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); abort = true; }
 181                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 182
 183                         if (columnfile != "") {
 184                                 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
 185                         }
 186
 187                         if (fastafile != "") {
 188                                 if (taxFile == "") { m->mothurOut("You need to provide a taxonomy file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 189                                 if (namefile == "") { m->mothurOut("You need to provide a names file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 190                         }
 191
 192                         //check for optional parameter and set defaults
 193                         // ...at some point should added some additional type checking...
 194                         //get user cutoff and precision or use defaults
 195                         string temp;
 196                         temp = validParameter.validFile(parameters, "precision", false);
 197                         if (temp == "not found") { temp = "100"; }
 198                         //saves precision legnth for formatting below
 199                         length = temp.length();
 200                         convert(temp, precision);
 201
 202                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
 203                         hard = m->isTrue(temp);
 204
 205                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 206                         large = m->isTrue(temp);
 207
 208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
 209                         convert(temp, processors);
 210
 211                         temp = validParameter.validFile(parameters, "splitmethod", false);
 212                         if (splitmethod != "fasta") {
 213                                 if (temp == "not found")  { splitmethod = "distance"; }
 214                                 else {  splitmethod = temp; }
 215                         }
 216
 217                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 218                         convert(temp, cutoff);
 219                         cutoff += (5 / (precision * 10.0));
 220
 221                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 222                         convert(temp, taxLevelCutoff);
 223
 224                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
 225
 226                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 227                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 228
 229                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 230                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 231
 232                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 233
 234                         showabund = validParameter.validFile(parameters, "showabund", false);
 235                         if (showabund == "not found") { showabund = "T"; }
 236
 237                         timing = validParameter.validFile(parameters, "timing", false);
 238                         if (timing == "not found") { timing = "F"; }
 239
 240                 }
 241         }
 242         catch(exception& e) {
 243                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 244                 exit(1);
 245         }
 246 }
 247
 248 //**********************************************************************************************************************
 249
 250 void ClusterSplitCommand::help(){
 251         try {
 252                 m->mothurOut("The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n");
 253                 m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n");
 254                 m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n");
 255                 m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n");
 256                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n");
 257                 m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n");
 258                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n");
 259                 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
 260                 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file. \n");
 261                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
 262                 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
 263                 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
 264                 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
 265                 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
 266                 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
 267                 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
 268                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
 269                 #ifdef USE_MPI
 270                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
 271                 #endif
 272                 m->mothurOut("The cluster.split command should be in the following format: \n");
 273                 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
 274                 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");
 275
 276         }
 277         catch(exception& e) {
 278                 m->errorOut(e, "ClusterSplitCommand", "help");
 279                 exit(1);
 280         }
 281 }
 282
 283 //**********************************************************************************************************************
 284
 285 ClusterSplitCommand::~ClusterSplitCommand(){}
 286
 287 //**********************************************************************************************************************
 288
 289 int ClusterSplitCommand::execute(){
 290         try {
 291
 292                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 293
 294                 time_t estart;
 295                 vector<string> listFileNames;
 296                 set<string> labels;
 297                 string singletonName = "";
 298                 double saveCutoff = cutoff;
 299
 300                 //****************** file prep work ******************************//
 301                 #ifdef USE_MPI
 302                         int pid;
 303                         int tag = 2001;
 304                         MPI_Status status;
 305                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 306                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 307
 308                         if (pid == 0) { //only process 0 converts and splits
 309
 310                 #endif
 311
 312                 //if user gave a phylip file convert to column file
 313                 if (format == "phylip") {
 314                         estart = time(NULL);
 315                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 316
 317                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 318
 319                         NameAssignment* nameMap = NULL;
 320                         convert->setFormat("phylip");
 321                         convert->read(nameMap);
 322
 323                         if (m->control_pressed) {  delete convert;  return 0;  }
 324
 325                         distfile = convert->getOutputFile();
 326
 327                         //if no names file given with phylip file, create it
 328                         ListVector* listToMakeNameFile =  convert->getListVector();
 329                         if (namefile == "") {  //you need to make a namefile for split matrix
 330                                 ofstream out;
 331                                 namefile = phylipfile + ".names";
 332                                 m->openOutputFile(namefile, out);
 333                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 334                                         string bin = listToMakeNameFile->get(i);
 335                                         out << bin << '\t' << bin << endl;
 336                                 }
 337                                 out.close();
 338                         }
 339                         delete listToMakeNameFile;
 340                         delete convert;
 341
 342                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 343                 }
 344                 if (m->control_pressed) { return 0; }
 345
 346                 estart = time(NULL);
 347                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 348
 349                 //split matrix into non-overlapping groups
 350                 SplitMatrix* split;
 351                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 352                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 353                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 354                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 355
 356                 split->split();
 357
 358                 if (m->control_pressed) { delete split; return 0; }
 359
 360                 singletonName = split->getSingletonNames();
 361                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 362                 delete split;
 363
 364                 //output a merged distance file
 365                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 366
 367
 368                 if (m->control_pressed) { return 0; }
 369
 370                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 371                 estart = time(NULL);
 372
 373                 //****************** break up files between processes and cluster each file set ******************************//
 374         #ifdef USE_MPI
 375                         ////you are process 0 from above////
 376
 377                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 378                         dividedNames.resize(processors);
 379
 380                         //for each file group figure out which process will complete it
 381                         //want to divide the load intelligently so the big files are spread between processes
 382                         for (int i = 0; i < distName.size(); i++) {
 383                                 int processToAssign = (i+1) % processors;
 384                                 if (processToAssign == 0) { processToAssign = processors; }
 385
 386                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 387                         }
 388
 389                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 390                         for (int i = 0; i < processors; i++) {
 391                                 int remainder = ((i+1) % processors);
 392                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 393                         }
 394
 395
 396                         //send each child the list of files it needs to process
 397                         for(int i = 1; i < processors; i++) {
 398                                 //send number of file pairs
 399                                 int num = dividedNames[i].size();
 400                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 401
 402                                 for (int j = 0; j < num; j++) { //send filenames to process i
 403                                         char tempDistFileName[1024];
 404                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 405                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 406
 407                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 408                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 409
 410                                         char tempNameFileName[1024];
 411                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 412                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 413
 414                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 415                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 416                                 }
 417                         }
 418
 419                         //process your share
 420                         listFileNames = cluster(dividedNames[0], labels);
 421
 422                         //receive the other processes info
 423                         for(int i = 1; i < processors; i++) {
 424                                 int num = dividedNames[i].size();
 425
 426                                 double tempCutoff;
 427                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 428                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 429
 430                                 //send list filenames to root process
 431                                 for (int j = 0; j < num; j++) {
 432                                         int lengthList = 0;
 433                                         char tempListFileName[1024];
 434
 435                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 436                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 437
 438                                         string myListFileName = tempListFileName;
 439                                         myListFileName = myListFileName.substr(0, lengthList);
 440
 441                                         listFileNames.push_back(myListFileName);
 442                                 }
 443
 444                                 //send Labels to root process
 445                                 int numLabels = 0;
 446                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 447
 448                                 for (int j = 0; j < numLabels; j++) {
 449                                         int lengthLabel = 0;
 450                                         char tempLabel[100];
 451
 452                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 453                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 454
 455                                         string myLabel = tempLabel;
 456                                         myLabel = myLabel.substr(0, lengthLabel);
 457
 458                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 459                                 }
 460                         }
 461
 462                 }else { //you are a child process
 463                         vector < map<string, string> >  myNames;
 464
 465                         //recieve the files you need to process
 466                         //receive number of file pairs
 467                         int num = 0;
 468                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 469
 470                         myNames.resize(num);
 471
 472                         for (int j = 0; j < num; j++) { //receive filenames to process
 473                                 int lengthDist = 0;
 474                                 char tempDistFileName[1024];
 475
 476                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 477                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 478
 479                                 string myDistFileName = tempDistFileName;
 480                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 481
 482                                 int lengthName = 0;
 483                                 char tempNameFileName[1024];
 484
 485                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 486                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 487
 488                                 string myNameFileName = tempNameFileName;
 489                                 myNameFileName = myNameFileName.substr(0, lengthName);
 490
 491                                 //save file name
 492                                 myNames[j][myDistFileName] = myNameFileName;
 493                         }
 494
 495                         //process them
 496                         listFileNames = cluster(myNames, labels);
 497
 498                         //send cutoff
 499                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 500
 501                         //send list filenames to root process
 502                         for (int j = 0; j < num; j++) {
 503                                 char tempListFileName[1024];
 504                                 strcpy(tempListFileName, listFileNames[j].c_str());
 505                                 int lengthList = listFileNames[j].length();
 506
 507                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 508                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 509                         }
 510
 511                         //send Labels to root process
 512                         int numLabels = labels.size();
 513                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 514
 515                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 516                                 char tempLabel[100];
 517                                 strcpy(tempLabel, (*it).c_str());
 518                                 int lengthLabel = (*it).length();
 519
 520                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 521                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 522                         }
 523                 }
 524
 525                 //make everyone wait
 526                 MPI_Barrier(MPI_COMM_WORLD);
 527
 528         #else
 529
 530                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 531                                 if(processors == 1){
 532                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 533                                 }else{
 534                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 535                                         dividedNames.resize(processors);
 536
 537                                         //for each file group figure out which process will complete it
 538                                         //want to divide the load intelligently so the big files are spread between processes
 539                                         for (int i = 0; i < distName.size(); i++) {
 540                                                 int processToAssign = (i+1) % processors;
 541                                                 if (processToAssign == 0) { processToAssign = processors; }
 542
 543                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 544                                         }
 545
 546                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 547                                         for (int i = 0; i < processors; i++) {
 548                                                 int remainder = ((i+1) % processors);
 549                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 550                                         }
 551
 552                                         createProcesses(dividedNames);
 553
 554                                         if (m->control_pressed) { return 0; }
 555
 556                                         //get list of list file names from each process
 557                                         for(int i=0;i<processors;i++){
 558                                                 string filename = toString(processIDS[i]) + ".temp";
 559                                                 ifstream in;
 560                                                 m->openInputFile(filename, in);
 561
 562                                                 in >> tag; m->gobble(in);
 563
 564                                                 while(!in.eof()) {
 565                                                         string tempName;
 566                                                         in >> tempName; m->gobble(in);
 567                                                         listFileNames.push_back(tempName);
 568                                                 }
 569                                                 in.close();
 570                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 571
 572                                                 //get labels
 573                                                 filename = toString(processIDS[i]) + ".temp.labels";
 574                                                 ifstream in2;
 575                                                 m->openInputFile(filename, in2);
 576
 577                                                 float tempCutoff;
 578                                                 in2 >> tempCutoff; m->gobble(in2);
 579                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 580
 581                                                 while(!in2.eof()) {
 582                                                         string tempName;
 583                                                         in2 >> tempName; m->gobble(in2);
 584                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 585                                                 }
 586                                                 in2.close();
 587                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 588                                         }
 589                                 }
 590                 #else
 591                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 592                 #endif
 593         #endif
 594                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 595
 596                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 597
 598                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 599
 600                 //****************** merge list file and create rabund and sabund files ******************************//
 601                 estart = time(NULL);
 602                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 603
 604                 #ifdef USE_MPI
 605                         if (pid == 0) { //only process 0 merges
 606                 #endif
 607
 608                 ListVector* listSingle;
 609                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 610
 611                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 612
 613                 mergeLists(listFileNames, labelBins, listSingle);
 614
 615                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 616
 617                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 618
 619                 m->mothurOutEndLine();
 620                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 621                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 622                 m->mothurOutEndLine();
 623
 624                 #ifdef USE_MPI
 625                         } //only process 0 merges
 626
 627                         //make everyone wait
 628                         MPI_Barrier(MPI_COMM_WORLD);
 629                 #endif
 630
 631                 return 0;
 632         }
 633         catch(exception& e) {
 634                 m->errorOut(e, "ClusterSplitCommand", "execute");
 635                 exit(1);
 636         }
 637 }
 638 //**********************************************************************************************************************
 639 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 640         try {
 641
 642                 map<float, int> labelBin;
 643                 vector<float> orderFloat;
 644                 int numSingleBins;
 645
 646                 //read in singletons
 647                 if (singleton != "none") {
 648                         ifstream in;
 649                         m->openInputFile(singleton, in);
 650
 651                         string firstCol, secondCol;
 652                         listSingle = new ListVector();
 653                         while (!in.eof()) {
 654                                 in >> firstCol >> secondCol; m->gobble(in);
 655                                 listSingle->push_back(secondCol);
 656                         }
 657                         in.close();
 658                         remove(singleton.c_str());
 659
 660                         numSingleBins = listSingle->getNumBins();
 661                 }else{  listSingle = NULL; numSingleBins = 0;  }
 662
 663                 //go through users set and make them floats so we can sort them
 664                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 665                         float temp = -10.0;
 666
 667                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 668                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 669
 670                         if (temp <= cutoff) {
 671                                 orderFloat.push_back(temp);
 672                                 labelBin[temp] = numSingleBins; //initialize numbins
 673                         }
 674                 }
 675
 676                 //sort order
 677                 sort(orderFloat.begin(), orderFloat.end());
 678                 userLabels.clear();
 679
 680                 //get the list info from each file
 681                 for (int k = 0; k < listNames.size(); k++) {
 682
 683                         if (m->control_pressed) {
 684                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 685                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 686                                 return labelBin;
 687                         }
 688
 689                         InputData* input = new InputData(listNames[k], "list");
 690                         ListVector* list = input->getListVector();
 691                         string lastLabel = list->getLabel();
 692
 693                         string filledInList = listNames[k] + "filledInTemp";
 694                         ofstream outFilled;
 695                         m->openOutputFile(filledInList, outFilled);
 696
 697                         //for each label needed
 698                         for(int l = 0; l < orderFloat.size(); l++){
 699
 700                                 string thisLabel;
 701                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 702                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 703
 704                                 //this file has reached the end
 705                                 if (list == NULL) {
 706                                         list = input->getListVector(lastLabel, true);
 707                                 }else{  //do you have the distance, or do you need to fill in
 708
 709                                         float labelFloat;
 710                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 711                                         else { convert(list->getLabel(), labelFloat); }
 712
 713                                         //check for missing labels
 714                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 715                                                 //if its bigger get last label, otherwise keep it
 716                                                 delete list;
 717                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 718                                         }
 719                                         lastLabel = list->getLabel();
 720                                 }
 721
 722                                 //print to new file
 723                                 list->setLabel(thisLabel);
 724                                 list->print(outFilled);
 725
 726                                 //update labelBin
 727                                 labelBin[orderFloat[l]] += list->getNumBins();
 728
 729                                 delete list;
 730
 731                                 list = input->getListVector();
 732                         }
 733
 734                         if (list != NULL) { delete list; }
 735                         delete input;
 736
 737                         outFilled.close();
 738                         remove(listNames[k].c_str());
 739                         rename(filledInList.c_str(), listNames[k].c_str());
 740                 }
 741
 742                 return labelBin;
 743         }
 744         catch(exception& e) {
 745                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 746                 exit(1);
 747         }
 748 }
 749 //**********************************************************************************************************************
 750 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 751         try {
 752                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 753                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 754
 755                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 756                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 757                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 758
 759                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 760                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 761                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 762
 763                 map<float, int>::iterator itLabel;
 764
 765                 //for each label needed
 766                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 767
 768                         string thisLabel;
 769                         if (itLabel->first == -1) { thisLabel = "unique"; }
 770                         else { thisLabel = toString(itLabel->first,  length-1);  }
 771
 772                         outList << thisLabel << '\t' << itLabel->second << '\t';
 773
 774                         RAbundVector* rabund = new RAbundVector();
 775                         rabund->setLabel(thisLabel);
 776
 777                         //add in singletons
 778                         if (listSingle != NULL) {
 779                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 780                                         outList << listSingle->get(j) << '\t';
 781                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 782                                 }
 783                         }
 784
 785                         //get the list info from each file
 786                         for (int k = 0; k < listNames.size(); k++) {
 787
 788                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 789
 790                                 InputData* input = new InputData(listNames[k], "list");
 791                                 ListVector* list = input->getListVector(thisLabel);
 792
 793                                 //this file has reached the end
 794                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 795                                 else {
 796                                         for (int j = 0; j < list->getNumBins(); j++) {
 797                                                 outList << list->get(j) << '\t';
 798                                                 rabund->push_back(m->getNumNames(list->get(j)));
 799                                         }
 800                                         delete list;
 801                                 }
 802                                 delete input;
 803                         }
 804
 805                         SAbundVector sabund = rabund->getSAbundVector();
 806
 807                         sabund.print(outSabund);
 808                         rabund->print(outRabund);
 809                         outList << endl;
 810
 811                         delete rabund;
 812                 }
 813
 814                 outList.close();
 815                 outRabund.close();
 816                 outSabund.close();
 817
 818                 if (listSingle != NULL) { delete listSingle;  }
 819
 820                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 821
 822                 return 0;
 823         }
 824         catch(exception& e) {
 825                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 826                 exit(1);
 827         }
 828 }
 829
 830 //**********************************************************************************************************************
 831
 832 void ClusterSplitCommand::printData(ListVector* oldList){
 833         try {
 834                 string label = oldList->getLabel();
 835                 RAbundVector oldRAbund = oldList->getRAbundVector();
 836
 837                 oldRAbund.setLabel(label);
 838                 if (m->isTrue(showabund)) {
 839                         oldRAbund.getSAbundVector().print(cout);
 840                 }
 841                 oldRAbund.print(outRabund);
 842                 oldRAbund.getSAbundVector().print(outSabund);
 843
 844                 oldList->print(outList);
 845         }
 846         catch(exception& e) {
 847                 m->errorOut(e, "ClusterSplitCommand", "printData");
 848                 exit(1);
 849         }
 850 }
 851 //**********************************************************************************************************************
 852 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 853         try {
 854
 855         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 856                 int process = 0;
 857                 int exitCommand = 1;
 858                 processIDS.clear();
 859
 860                 //loop through and create all the processes you want
 861                 while (process != processors) {
 862                         int pid = fork();
 863
 864                         if (pid > 0) {
 865                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 866                                 process++;
 867                         }else if (pid == 0){
 868                                 set<string> labels;
 869                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 870
 871                                 //write out names to file
 872                                 string filename = toString(getpid()) + ".temp";
 873                                 ofstream out;
 874                                 m->openOutputFile(filename, out);
 875                                 out << tag << endl;
 876                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 877                                 out.close();
 878
 879                                 //print out labels
 880                                 ofstream outLabels;
 881                                 filename = toString(getpid()) + ".temp.labels";
 882                                 m->openOutputFile(filename, outLabels);
 883
 884                                 outLabels << cutoff << endl;
 885                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 886                                         outLabels << (*it) << endl;
 887                                 }
 888                                 outLabels.close();
 889
 890                                 exit(0);
 891                         }else {
 892                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 893                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 894                                 exit(0);
 895                         }
 896                 }
 897
 898                 //force parent to wait until all the processes are done
 899                 for (int i=0;i<processors;i++) {
 900                         int temp = processIDS[i];
 901                         wait(&temp);
 902                 }
 903
 904                 return exitCommand;
 905         #endif
 906
 907         }
 908         catch(exception& e) {
 909                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 910                 exit(1);
 911         }
 912 }
 913 //**********************************************************************************************************************
 914
 915 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 916         try {
 917                 Cluster* cluster;
 918                 SparseMatrix* matrix;
 919                 ListVector* list;
 920                 ListVector oldList;
 921                 RAbundVector* rabund;
 922
 923                 vector<string> listFileNames;
 924
 925                 double smallestCutoff = cutoff;
 926
 927                 //cluster each distance file
 928                 for (int i = 0; i < distNames.size(); i++) {
 929                         if (m->control_pressed) { return listFileNames; }
 930
 931                         string thisNamefile = distNames[i].begin()->second;
 932                         string thisDistFile = distNames[i].begin()->first;
 933
 934                         //read in distance file
 935                         globaldata->setNameFile(thisNamefile);
 936                         globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
 937
 938                         #ifdef USE_MPI
 939                                 int pid;
 940                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 941
 942                                 //output your files too
 943                                 if (pid != 0) {
 944                                         cout << endl << "Reading " << thisDistFile << endl;
 945                                 }
 946                         #endif
 947
 948                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 949
 950                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 951                         read->setCutoff(cutoff);
 952
 953                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 954                         nameMap->readMap();
 955                         read->read(nameMap);
 956
 957                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
 958
 959                         list = read->getListVector();
 960                         oldList = *list;
 961                         matrix = read->getMatrix();
 962
 963                         delete read;
 964                         delete nameMap;
 965
 966
 967                         #ifdef USE_MPI
 968                                 //output your files too
 969                                 if (pid != 0) {
 970                                         cout << endl << "Clustering " << thisDistFile << endl;
 971                                 }
 972                         #endif
 973
 974                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
 975
 976                         rabund = new RAbundVector(list->getRAbundVector());
 977
 978                         //create cluster
 979                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
 980                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
 981                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
 982                         tag = cluster->getTag();
 983
 984                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
 985                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
 986
 987                         ofstream listFile;
 988                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
 989
 990                         listFileNames.push_back(fileroot+ tag + ".list");
 991
 992                         float previousDist = 0.00000;
 993                         float rndPreviousDist = 0.00000;
 994
 995                         oldList = *list;
 996
 997                         print_start = true;
 998                         start = time(NULL);
 999                         double saveCutoff = cutoff;
1000
1001                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1002
1003                                 if (m->control_pressed) { //clean up
1004                                         delete matrix; delete list;     delete cluster; delete rabund;
1005                                         listFile.close();
1006                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1007                                         listFileNames.clear(); return listFileNames;
1008                                 }
1009
1010                                 cluster->update(saveCutoff);
1011
1012                                 float dist = matrix->getSmallDist();
1013                                 float rndDist;
1014                                 if (hard) {
1015                                         rndDist = m->ceilDist(dist, precision);
1016                                 }else{
1017                                         rndDist = m->roundDist(dist, precision);
1018                                 }
1019
1020                                 if(previousDist <= 0.0000 && dist != previousDist){
1021                                         oldList.setLabel("unique");
1022                                         oldList.print(listFile);
1023                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1024                                 }
1025                                 else if(rndDist != rndPreviousDist){
1026                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1027                                         oldList.print(listFile);
1028                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1029                                 }
1030
1031                                 previousDist = dist;
1032                                 rndPreviousDist = rndDist;
1033                                 oldList = *list;
1034                         }
1035
1036
1037                         if(previousDist <= 0.0000){
1038                                 oldList.setLabel("unique");
1039                                 oldList.print(listFile);
1040                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1041                         }
1042                         else if(rndPreviousDist<cutoff){
1043                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1044                                 oldList.print(listFile);
1045                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1046                         }
1047
1048                         delete matrix; delete list;     delete cluster; delete rabund;
1049                         listFile.close();
1050
1051                         if (m->control_pressed) { //clean up
1052                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1053                                 listFileNames.clear(); return listFileNames;
1054                         }
1055
1056                         remove(thisDistFile.c_str());
1057                         remove(thisNamefile.c_str());
1058
1059                         if (saveCutoff != cutoff) {
1060                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1061                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1062
1063                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1064                         }
1065
1066                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1067                 }
1068
1069                 cutoff = smallestCutoff;
1070
1071                 return listFileNames;
1072
1073         }
1074         catch(exception& e) {
1075                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1076                 exit(1);
1077         }
1078
1079
1080 }
1081 //**********************************************************************************************************************
1082
1083 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1084         try{
1085
1086 #ifdef USE_MPI
1087                 int pid;
1088                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1089
1090                 if (pid != 0) {
1091 #endif
1092
1093                 string thisOutputDir = outputDir;
1094                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1095                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1096                 remove(outputFileName.c_str());
1097
1098
1099                 for (int i = 0; i < distNames.size(); i++) {
1100                         if (m->control_pressed) {  return 0; }
1101
1102                         string thisDistFile = distNames[i].begin()->first;
1103
1104                         m->appendFiles(thisDistFile, outputFileName);
1105                 }
1106
1107                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1108
1109 #ifdef USE_MPI
1110                 }
1111 #endif
1112
1113                 return 0;
1114
1115
1116         }
1117         catch(exception& e) {
1118                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1119                 exit(1);
1120         }
1121 }
1122 //**********************************************************************************************************************