clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::getValidParameters(){
  21         try {
  22                 string AlignArray[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  23                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
  24                 return myArray;
  25         }
  26         catch(exception& e) {
  27                 m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
  28                 exit(1);
  29         }
  30 }
  31 //**********************************************************************************************************************
  32 ClusterSplitCommand::ClusterSplitCommand(){
  33         try {
  34                 abort = true;
  35                 //initialize outputTypes
  36                 vector<string> tempOutNames;
  37                 outputTypes["list"] = tempOutNames;
  38                 outputTypes["rabund"] = tempOutNames;
  39                 outputTypes["sabund"] = tempOutNames;
  40                 outputTypes["column"] = tempOutNames;
  41         }
  42         catch(exception& e) {
  43                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  44                 exit(1);
  45         }
  46 }
  47 //**********************************************************************************************************************
  48 vector<string> ClusterSplitCommand::getRequiredParameters(){
  49         try {
  50                 string Array[] =  {"fasta","phylip","column","or"};
  51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  52                 return myArray;
  53         }
  54         catch(exception& e) {
  55                 m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
  56                 exit(1);
  57         }
  58 }
  59 //**********************************************************************************************************************
  60 vector<string> ClusterSplitCommand::getRequiredFiles(){
  61         try {
  62                 vector<string> myArray;
  63                 return myArray;
  64         }
  65         catch(exception& e) {
  66                 m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
  67                 exit(1);
  68         }
  69 }
  70 //**********************************************************************************************************************
  71 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
  72 ClusterSplitCommand::ClusterSplitCommand(string option)  {
  73         try{
  74                 globaldata = GlobalData::getInstance();
  75                 abort = false;
  76                 format = "";
  77
  78                 //allow user to run help
  79                 if(option == "help") { help(); abort = true; }
  80
  81                 else {
  82                         //valid paramters for this command
  83                         string Array[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  84                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  85
  86                         OptionParser parser(option);
  87                         map<string,string> parameters = parser.getParameters();
  88
  89                         ValidParameters validParameter("cluster.split");
  90
  91                         //check to make sure all parameters are valid for command
  92                         map<string,string>::iterator it;
  93                         for (it = parameters.begin(); it != parameters.end(); it++) {
  94                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
  95                                         abort = true;
  96                                 }
  97                         }
  98
  99                         //initialize outputTypes
 100                         vector<string> tempOutNames;
 101                         outputTypes["list"] = tempOutNames;
 102                         outputTypes["rabund"] = tempOutNames;
 103                         outputTypes["sabund"] = tempOutNames;
 104                         outputTypes["column"] = tempOutNames;
 105
 106                         globaldata->newRead();
 107
 108                         //if the user changes the output directory command factory will send this info to us in the output parameter
 109                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 110
 111                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 113                         if (inputDir == "not found"){   inputDir = "";          }
 114                         else {
 115                                 string path;
 116                                 it = parameters.find("phylip");
 117                                 //user has given a template file
 118                                 if(it != parameters.end()){
 119                                         path = m->hasPath(it->second);
 120                                         //if the user has not given a path then, add inputdir. else leave path alone.
 121                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 122                                 }
 123
 124                                 it = parameters.find("column");
 125                                 //user has given a template file
 126                                 if(it != parameters.end()){
 127                                         path = m->hasPath(it->second);
 128                                         //if the user has not given a path then, add inputdir. else leave path alone.
 129                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 130                                 }
 131
 132                                 it = parameters.find("name");
 133                                 //user has given a template file
 134                                 if(it != parameters.end()){
 135                                         path = m->hasPath(it->second);
 136                                         //if the user has not given a path then, add inputdir. else leave path alone.
 137                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 138                                 }
 139
 140                                 it = parameters.find("taxonomy");
 141                                 //user has given a template file
 142                                 if(it != parameters.end()){
 143                                         path = m->hasPath(it->second);
 144                                         //if the user has not given a path then, add inputdir. else leave path alone.
 145                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 146                                 }
 147
 148                                 it = parameters.find("fasta");
 149                                 //user has given a template file
 150                                 if(it != parameters.end()){
 151                                         path = m->hasPath(it->second);
 152                                         //if the user has not given a path then, add inputdir. else leave path alone.
 153                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 154                                 }
 155                         }
 156
 157                         //check for required parameters
 158                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 159                         if (phylipfile == "not open") { abort = true; }
 160                         else if (phylipfile == "not found") { phylipfile = ""; }
 161                         else {  distfile = phylipfile;  format = "phylip";      }
 162
 163                         columnfile = validParameter.validFile(parameters, "column", true);
 164                         if (columnfile == "not open") { abort = true; }
 165                         else if (columnfile == "not found") { columnfile = ""; }
 166                         else {  distfile = columnfile; format = "column";       }
 167
 168                         namefile = validParameter.validFile(parameters, "name", true);
 169                         if (namefile == "not open") { abort = true; }
 170                         else if (namefile == "not found") { namefile = ""; }
 171
 172                         fastafile = validParameter.validFile(parameters, "fasta", true);
 173                         if (fastafile == "not open") { abort = true; }
 174                         else if (fastafile == "not found") { fastafile = ""; }
 175                         else { distfile = fastafile;  splitmethod = "fasta";  }
 176
 177                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 178                         if (taxFile == "not open") { abort = true; }
 179                         else if (taxFile == "not found") { taxFile = ""; }
 180
 181                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); abort = true; }
 182                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 183
 184                         if (columnfile != "") {
 185                                 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
 186                         }
 187
 188                         if (fastafile != "") {
 189                                 if (taxFile == "") { m->mothurOut("You need to provide a taxonomy file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 190                                 if (namefile == "") { m->mothurOut("You need to provide a names file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 191                         }
 192
 193                         //check for optional parameter and set defaults
 194                         // ...at some point should added some additional type checking...
 195                         //get user cutoff and precision or use defaults
 196                         string temp;
 197                         temp = validParameter.validFile(parameters, "precision", false);
 198                         if (temp == "not found") { temp = "100"; }
 199                         //saves precision legnth for formatting below
 200                         length = temp.length();
 201                         convert(temp, precision);
 202
 203                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
 204                         hard = m->isTrue(temp);
 205
 206                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 207                         large = m->isTrue(temp);
 208
 209                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
 210                         convert(temp, processors);
 211
 212                         temp = validParameter.validFile(parameters, "splitmethod", false);
 213                         if (splitmethod != "fasta") {
 214                                 if (temp == "not found")  { splitmethod = "distance"; }
 215                                 else {  splitmethod = temp; }
 216                         }
 217
 218                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 219                         convert(temp, cutoff);
 220                         cutoff += (5 / (precision * 10.0));
 221
 222                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 223                         convert(temp, taxLevelCutoff);
 224
 225                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
 226
 227                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 228                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 229
 230                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 231                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 232
 233                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 234
 235                         showabund = validParameter.validFile(parameters, "showabund", false);
 236                         if (showabund == "not found") { showabund = "T"; }
 237
 238                         timing = validParameter.validFile(parameters, "timing", false);
 239                         if (timing == "not found") { timing = "F"; }
 240
 241                 }
 242         }
 243         catch(exception& e) {
 244                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 245                 exit(1);
 246         }
 247 }
 248
 249 //**********************************************************************************************************************
 250
 251 void ClusterSplitCommand::help(){
 252         try {
 253                 m->mothurOut("The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n");
 254                 m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n");
 255                 m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n");
 256                 m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n");
 257                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n");
 258                 m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n");
 259                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n");
 260                 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
 261                 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file. \n");
 262                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
 263                 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
 264                 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
 265                 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
 266                 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
 267                 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
 268                 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
 269                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
 270                 #ifdef USE_MPI
 271                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
 272                 #endif
 273                 m->mothurOut("The cluster.split command should be in the following format: \n");
 274                 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
 275                 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");
 276
 277         }
 278         catch(exception& e) {
 279                 m->errorOut(e, "ClusterSplitCommand", "help");
 280                 exit(1);
 281         }
 282 }
 283
 284 //**********************************************************************************************************************
 285
 286 ClusterSplitCommand::~ClusterSplitCommand(){}
 287
 288 //**********************************************************************************************************************
 289
 290 int ClusterSplitCommand::execute(){
 291         try {
 292
 293                 if (abort == true) {    return 0;       }
 294
 295                 time_t estart;
 296                 vector<string> listFileNames;
 297                 set<string> labels;
 298                 string singletonName = "";
 299                 double saveCutoff = cutoff;
 300
 301                 //****************** file prep work ******************************//
 302                 #ifdef USE_MPI
 303                         int pid;
 304                         int tag = 2001;
 305                         MPI_Status status;
 306                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 307                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 308
 309                         if (pid == 0) { //only process 0 converts and splits
 310
 311                 #endif
 312
 313                 //if user gave a phylip file convert to column file
 314                 if (format == "phylip") {
 315                         estart = time(NULL);
 316                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 317
 318                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 319
 320                         NameAssignment* nameMap = NULL;
 321                         convert->setFormat("phylip");
 322                         convert->read(nameMap);
 323
 324                         if (m->control_pressed) {  delete convert;  return 0;  }
 325
 326                         distfile = convert->getOutputFile();
 327
 328                         //if no names file given with phylip file, create it
 329                         ListVector* listToMakeNameFile =  convert->getListVector();
 330                         if (namefile == "") {  //you need to make a namefile for split matrix
 331                                 ofstream out;
 332                                 namefile = phylipfile + ".names";
 333                                 m->openOutputFile(namefile, out);
 334                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 335                                         string bin = listToMakeNameFile->get(i);
 336                                         out << bin << '\t' << bin << endl;
 337                                 }
 338                                 out.close();
 339                         }
 340                         delete listToMakeNameFile;
 341                         delete convert;
 342
 343                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 344                 }
 345                 if (m->control_pressed) { return 0; }
 346
 347                 estart = time(NULL);
 348                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 349
 350                 //split matrix into non-overlapping groups
 351                 SplitMatrix* split;
 352                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 353                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 354                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 355                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 356
 357                 split->split();
 358
 359                 if (m->control_pressed) { delete split; return 0; }
 360
 361                 singletonName = split->getSingletonNames();
 362                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 363                 delete split;
 364
 365                 //output a merged distance file
 366                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 367
 368
 369                 if (m->control_pressed) { return 0; }
 370
 371                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 372                 estart = time(NULL);
 373
 374                 //****************** break up files between processes and cluster each file set ******************************//
 375         #ifdef USE_MPI
 376                         ////you are process 0 from above////
 377
 378                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 379                         dividedNames.resize(processors);
 380
 381                         //for each file group figure out which process will complete it
 382                         //want to divide the load intelligently so the big files are spread between processes
 383                         int count = 1;
 384                         for (int i = 0; i < distName.size(); i++) {
 385                                 int processToAssign = (i+1) % processors;
 386                                 if (processToAssign == 0) { processToAssign = processors; }
 387
 388                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 389                         }
 390
 391                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 392                         for (int i = 0; i < processors; i++) {
 393                                 int remainder = ((i+1) % processors);
 394                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 395                         }
 396
 397
 398                         //send each child the list of files it needs to process
 399                         for(int i = 1; i < processors; i++) {
 400                                 //send number of file pairs
 401                                 int num = dividedNames[i].size();
 402                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 403
 404                                 for (int j = 0; j < num; j++) { //send filenames to process i
 405                                         char tempDistFileName[1024];
 406                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 407                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 408
 409                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 410                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 411
 412                                         char tempNameFileName[1024];
 413                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 414                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 415
 416                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 417                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 418                                 }
 419                         }
 420
 421                         //process your share
 422                         listFileNames = cluster(dividedNames[0], labels);
 423
 424                         //receive the other processes info
 425                         for(int i = 1; i < processors; i++) {
 426                                 int num = dividedNames[i].size();
 427
 428                                 double tempCutoff;
 429                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 430                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 431
 432                                 //send list filenames to root process
 433                                 for (int j = 0; j < num; j++) {
 434                                         int lengthList = 0;
 435                                         char tempListFileName[1024];
 436
 437                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 438                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 439
 440                                         string myListFileName = tempListFileName;
 441                                         myListFileName = myListFileName.substr(0, lengthList);
 442
 443                                         listFileNames.push_back(myListFileName);
 444                                 }
 445
 446                                 //send Labels to root process
 447                                 int numLabels = 0;
 448                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 449
 450                                 for (int j = 0; j < numLabels; j++) {
 451                                         int lengthLabel = 0;
 452                                         char tempLabel[100];
 453
 454                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 455                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 456
 457                                         string myLabel = tempLabel;
 458                                         myLabel = myLabel.substr(0, lengthLabel);
 459
 460                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 461                                 }
 462                         }
 463
 464                 }else { //you are a child process
 465                         vector < map<string, string> >  myNames;
 466
 467                         //recieve the files you need to process
 468                         //receive number of file pairs
 469                         int num = 0;
 470                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 471
 472                         myNames.resize(num);
 473
 474                         for (int j = 0; j < num; j++) { //receive filenames to process
 475                                 int lengthDist = 0;
 476                                 char tempDistFileName[1024];
 477
 478                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 479                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 480
 481                                 string myDistFileName = tempDistFileName;
 482                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 483
 484                                 int lengthName = 0;
 485                                 char tempNameFileName[1024];
 486
 487                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 488                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 489
 490                                 string myNameFileName = tempNameFileName;
 491                                 myNameFileName = myNameFileName.substr(0, lengthName);
 492
 493                                 //save file name
 494                                 myNames[j][myDistFileName] = myNameFileName;
 495                         }
 496
 497                         //process them
 498                         listFileNames = cluster(myNames, labels);
 499
 500                         //send cutoff
 501                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 502
 503                         //send list filenames to root process
 504                         for (int j = 0; j < num; j++) {
 505                                 char tempListFileName[1024];
 506                                 strcpy(tempListFileName, listFileNames[j].c_str());
 507                                 int lengthList = listFileNames[j].length();
 508
 509                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 510                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 511                         }
 512
 513                         //send Labels to root process
 514                         int numLabels = labels.size();
 515                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 516
 517                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 518                                 char tempLabel[100];
 519                                 strcpy(tempLabel, (*it).c_str());
 520                                 int lengthLabel = (*it).length();
 521
 522                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 523                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 524                         }
 525                 }
 526
 527                 //make everyone wait
 528                 MPI_Barrier(MPI_COMM_WORLD);
 529
 530         #else
 531
 532                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 533                                 if(processors == 1){
 534                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 535                                 }else{
 536                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 537                                         dividedNames.resize(processors);
 538
 539                                         //for each file group figure out which process will complete it
 540                                         //want to divide the load intelligently so the big files are spread between processes
 541                                         int count = 1;
 542                                         for (int i = 0; i < distName.size(); i++) {
 543                                                 int processToAssign = (i+1) % processors;
 544                                                 if (processToAssign == 0) { processToAssign = processors; }
 545
 546                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 547                                         }
 548
 549                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 550                                         for (int i = 0; i < processors; i++) {
 551                                                 int remainder = ((i+1) % processors);
 552                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 553                                         }
 554
 555                                         createProcesses(dividedNames);
 556
 557                                         if (m->control_pressed) { return 0; }
 558
 559                                         //get list of list file names from each process
 560                                         for(int i=0;i<processors;i++){
 561                                                 string filename = toString(processIDS[i]) + ".temp";
 562                                                 ifstream in;
 563                                                 m->openInputFile(filename, in);
 564
 565                                                 in >> tag; m->gobble(in);
 566
 567                                                 while(!in.eof()) {
 568                                                         string tempName;
 569                                                         in >> tempName; m->gobble(in);
 570                                                         listFileNames.push_back(tempName);
 571                                                 }
 572                                                 in.close();
 573                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 574
 575                                                 //get labels
 576                                                 filename = toString(processIDS[i]) + ".temp.labels";
 577                                                 ifstream in2;
 578                                                 m->openInputFile(filename, in2);
 579
 580                                                 float tempCutoff;
 581                                                 in2 >> tempCutoff; m->gobble(in2);
 582                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 583
 584                                                 while(!in2.eof()) {
 585                                                         string tempName;
 586                                                         in2 >> tempName; m->gobble(in2);
 587                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 588                                                 }
 589                                                 in2.close();
 590                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 591                                         }
 592                                 }
 593                 #else
 594                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 595                 #endif
 596         #endif
 597                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 598
 599                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 600
 601                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 602
 603                 //****************** merge list file and create rabund and sabund files ******************************//
 604                 estart = time(NULL);
 605                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 606
 607                 #ifdef USE_MPI
 608                         if (pid == 0) { //only process 0 merges
 609                 #endif
 610
 611                 ListVector* listSingle;
 612                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 613
 614                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 615
 616                 mergeLists(listFileNames, labelBins, listSingle);
 617
 618                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 619
 620                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 621
 622                 m->mothurOutEndLine();
 623                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 624                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 625                 m->mothurOutEndLine();
 626
 627                 #ifdef USE_MPI
 628                         } //only process 0 merges
 629
 630                         //make everyone wait
 631                         MPI_Barrier(MPI_COMM_WORLD);
 632                 #endif
 633
 634                 return 0;
 635         }
 636         catch(exception& e) {
 637                 m->errorOut(e, "ClusterSplitCommand", "execute");
 638                 exit(1);
 639         }
 640 }
 641 //**********************************************************************************************************************
 642 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 643         try {
 644
 645                 map<float, int> labelBin;
 646                 vector<float> orderFloat;
 647                 int numSingleBins;
 648
 649                 //read in singletons
 650                 if (singleton != "none") {
 651                         ifstream in;
 652                         m->openInputFile(singleton, in);
 653
 654                         string firstCol, secondCol;
 655                         listSingle = new ListVector();
 656                         while (!in.eof()) {
 657                                 in >> firstCol >> secondCol; m->gobble(in);
 658                                 listSingle->push_back(secondCol);
 659                         }
 660                         in.close();
 661                         remove(singleton.c_str());
 662
 663                         numSingleBins = listSingle->getNumBins();
 664                 }else{  listSingle = NULL; numSingleBins = 0;  }
 665
 666                 //go through users set and make them floats so we can sort them
 667                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 668                         float temp = -10.0;
 669
 670                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 671                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 672
 673                         if (temp <= cutoff) {
 674                                 orderFloat.push_back(temp);
 675                                 labelBin[temp] = numSingleBins; //initialize numbins
 676                         }
 677                 }
 678
 679                 //sort order
 680                 sort(orderFloat.begin(), orderFloat.end());
 681                 userLabels.clear();
 682
 683                 //get the list info from each file
 684                 for (int k = 0; k < listNames.size(); k++) {
 685
 686                         if (m->control_pressed) {
 687                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 688                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 689                                 return labelBin;
 690                         }
 691
 692                         InputData* input = new InputData(listNames[k], "list");
 693                         ListVector* list = input->getListVector();
 694                         string lastLabel = list->getLabel();
 695
 696                         string filledInList = listNames[k] + "filledInTemp";
 697                         ofstream outFilled;
 698                         m->openOutputFile(filledInList, outFilled);
 699
 700                         //for each label needed
 701                         for(int l = 0; l < orderFloat.size(); l++){
 702
 703                                 string thisLabel;
 704                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 705                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 706
 707                                 //this file has reached the end
 708                                 if (list == NULL) {
 709                                         list = input->getListVector(lastLabel, true);
 710                                 }else{  //do you have the distance, or do you need to fill in
 711
 712                                         float labelFloat;
 713                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 714                                         else { convert(list->getLabel(), labelFloat); }
 715
 716                                         //check for missing labels
 717                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 718                                                 //if its bigger get last label, otherwise keep it
 719                                                 delete list;
 720                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 721                                         }
 722                                         lastLabel = list->getLabel();
 723                                 }
 724
 725                                 //print to new file
 726                                 list->setLabel(thisLabel);
 727                                 list->print(outFilled);
 728
 729                                 //update labelBin
 730                                 labelBin[orderFloat[l]] += list->getNumBins();
 731
 732                                 delete list;
 733
 734                                 list = input->getListVector();
 735                         }
 736
 737                         if (list != NULL) { delete list; }
 738                         delete input;
 739
 740                         outFilled.close();
 741                         remove(listNames[k].c_str());
 742                         rename(filledInList.c_str(), listNames[k].c_str());
 743                 }
 744
 745                 return labelBin;
 746         }
 747         catch(exception& e) {
 748                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 749                 exit(1);
 750         }
 751 }
 752 //**********************************************************************************************************************
 753 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 754         try {
 755                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 756                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 757
 758                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 759                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 760                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 761
 762                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 763                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 764                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 765
 766                 map<float, int>::iterator itLabel;
 767
 768                 //for each label needed
 769                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 770
 771                         string thisLabel;
 772                         if (itLabel->first == -1) { thisLabel = "unique"; }
 773                         else { thisLabel = toString(itLabel->first,  length-1);  }
 774
 775                         outList << thisLabel << '\t' << itLabel->second << '\t';
 776
 777                         RAbundVector* rabund = new RAbundVector();
 778                         rabund->setLabel(thisLabel);
 779
 780                         //add in singletons
 781                         if (listSingle != NULL) {
 782                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 783                                         outList << listSingle->get(j) << '\t';
 784                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 785                                 }
 786                         }
 787
 788                         //get the list info from each file
 789                         for (int k = 0; k < listNames.size(); k++) {
 790
 791                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 792
 793                                 InputData* input = new InputData(listNames[k], "list");
 794                                 ListVector* list = input->getListVector(thisLabel);
 795
 796                                 //this file has reached the end
 797                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 798                                 else {
 799                                         for (int j = 0; j < list->getNumBins(); j++) {
 800                                                 outList << list->get(j) << '\t';
 801                                                 rabund->push_back(m->getNumNames(list->get(j)));
 802                                         }
 803                                         delete list;
 804                                 }
 805                                 delete input;
 806                         }
 807
 808                         SAbundVector sabund = rabund->getSAbundVector();
 809
 810                         sabund.print(outSabund);
 811                         rabund->print(outRabund);
 812                         outList << endl;
 813
 814                         delete rabund;
 815                 }
 816
 817                 outList.close();
 818                 outRabund.close();
 819                 outSabund.close();
 820
 821                 if (listSingle != NULL) { delete listSingle;  }
 822
 823                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 824
 825                 return 0;
 826         }
 827         catch(exception& e) {
 828                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 829                 exit(1);
 830         }
 831 }
 832
 833 //**********************************************************************************************************************
 834
 835 void ClusterSplitCommand::printData(ListVector* oldList){
 836         try {
 837                 string label = oldList->getLabel();
 838                 RAbundVector oldRAbund = oldList->getRAbundVector();
 839
 840                 oldRAbund.setLabel(label);
 841                 if (m->isTrue(showabund)) {
 842                         oldRAbund.getSAbundVector().print(cout);
 843                 }
 844                 oldRAbund.print(outRabund);
 845                 oldRAbund.getSAbundVector().print(outSabund);
 846
 847                 oldList->print(outList);
 848         }
 849         catch(exception& e) {
 850                 m->errorOut(e, "ClusterSplitCommand", "printData");
 851                 exit(1);
 852         }
 853 }
 854 //**********************************************************************************************************************
 855 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 856         try {
 857
 858         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 859                 int process = 0;
 860                 int exitCommand = 1;
 861                 processIDS.clear();
 862
 863                 //loop through and create all the processes you want
 864                 while (process != processors) {
 865                         int pid = fork();
 866
 867                         if (pid > 0) {
 868                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 869                                 process++;
 870                         }else if (pid == 0){
 871                                 set<string> labels;
 872                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 873
 874                                 //write out names to file
 875                                 string filename = toString(getpid()) + ".temp";
 876                                 ofstream out;
 877                                 m->openOutputFile(filename, out);
 878                                 out << tag << endl;
 879                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 880                                 out.close();
 881
 882                                 //print out labels
 883                                 ofstream outLabels;
 884                                 filename = toString(getpid()) + ".temp.labels";
 885                                 m->openOutputFile(filename, outLabels);
 886
 887                                 outLabels << cutoff << endl;
 888                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 889                                         outLabels << (*it) << endl;
 890                                 }
 891                                 outLabels.close();
 892
 893                                 exit(0);
 894                         }else {
 895                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 896                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 897                                 exit(0);
 898                         }
 899                 }
 900
 901                 //force parent to wait until all the processes are done
 902                 for (int i=0;i<processors;i++) {
 903                         int temp = processIDS[i];
 904                         wait(&temp);
 905                 }
 906
 907                 return exitCommand;
 908         #endif
 909
 910         }
 911         catch(exception& e) {
 912                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 913                 exit(1);
 914         }
 915 }
 916 //**********************************************************************************************************************
 917
 918 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 919         try {
 920                 Cluster* cluster;
 921                 SparseMatrix* matrix;
 922                 ListVector* list;
 923                 ListVector oldList;
 924                 RAbundVector* rabund;
 925
 926                 vector<string> listFileNames;
 927
 928                 double smallestCutoff = cutoff;
 929
 930                 //cluster each distance file
 931                 for (int i = 0; i < distNames.size(); i++) {
 932                         if (m->control_pressed) { return listFileNames; }
 933
 934                         string thisNamefile = distNames[i].begin()->second;
 935                         string thisDistFile = distNames[i].begin()->first;
 936
 937                         //read in distance file
 938                         globaldata->setNameFile(thisNamefile);
 939                         globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
 940
 941                         #ifdef USE_MPI
 942                                 int pid;
 943                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 944
 945                                 //output your files too
 946                                 if (pid != 0) {
 947                                         cout << endl << "Reading " << thisDistFile << endl;
 948                                 }
 949                         #endif
 950
 951                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 952
 953                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 954                         read->setCutoff(cutoff);
 955
 956                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 957                         nameMap->readMap();
 958                         read->read(nameMap);
 959
 960                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
 961
 962                         list = read->getListVector();
 963                         oldList = *list;
 964                         matrix = read->getMatrix();
 965
 966                         delete read;
 967                         delete nameMap;
 968
 969
 970                         #ifdef USE_MPI
 971                                 //output your files too
 972                                 if (pid != 0) {
 973                                         cout << endl << "Clustering " << thisDistFile << endl;
 974                                 }
 975                         #endif
 976
 977                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
 978
 979                         rabund = new RAbundVector(list->getRAbundVector());
 980
 981                         //create cluster
 982                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
 983                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
 984                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
 985                         tag = cluster->getTag();
 986
 987                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
 988                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
 989
 990                         ofstream listFile;
 991                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
 992
 993                         listFileNames.push_back(fileroot+ tag + ".list");
 994
 995                         time_t estart = time(NULL);
 996
 997                         float previousDist = 0.00000;
 998                         float rndPreviousDist = 0.00000;
 999
1000                         oldList = *list;
1001
1002                         print_start = true;
1003                         start = time(NULL);
1004                         double saveCutoff = cutoff;
1005
1006                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1007
1008                                 if (m->control_pressed) { //clean up
1009                                         delete matrix; delete list;     delete cluster; delete rabund;
1010                                         listFile.close();
1011                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1012                                         listFileNames.clear(); return listFileNames;
1013                                 }
1014
1015                                 cluster->update(saveCutoff);
1016
1017                                 float dist = matrix->getSmallDist();
1018                                 float rndDist;
1019                                 if (hard) {
1020                                         rndDist = m->ceilDist(dist, precision);
1021                                 }else{
1022                                         rndDist = m->roundDist(dist, precision);
1023                                 }
1024
1025                                 if(previousDist <= 0.0000 && dist != previousDist){
1026                                         oldList.setLabel("unique");
1027                                         oldList.print(listFile);
1028                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1029                                 }
1030                                 else if(rndDist != rndPreviousDist){
1031                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1032                                         oldList.print(listFile);
1033                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1034                                 }
1035
1036                                 previousDist = dist;
1037                                 rndPreviousDist = rndDist;
1038                                 oldList = *list;
1039                         }
1040
1041
1042                         if(previousDist <= 0.0000){
1043                                 oldList.setLabel("unique");
1044                                 oldList.print(listFile);
1045                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1046                         }
1047                         else if(rndPreviousDist<cutoff){
1048                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1049                                 oldList.print(listFile);
1050                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1051                         }
1052
1053                         delete matrix; delete list;     delete cluster; delete rabund;
1054                         listFile.close();
1055
1056                         if (m->control_pressed) { //clean up
1057                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1058                                 listFileNames.clear(); return listFileNames;
1059                         }
1060
1061                         remove(thisDistFile.c_str());
1062                         remove(thisNamefile.c_str());
1063
1064                         if (saveCutoff != cutoff) {
1065                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1066                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1067
1068                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1069                         }
1070
1071                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1072                 }
1073
1074                 cutoff = smallestCutoff;
1075
1076                 return listFileNames;
1077
1078         }
1079         catch(exception& e) {
1080                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1081                 exit(1);
1082         }
1083
1084
1085 }
1086 //**********************************************************************************************************************
1087
1088 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1089         try{
1090
1091 #ifdef USE_MPI
1092                 int pid;
1093                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1094
1095                 if (pid != 0) {
1096 #endif
1097
1098                 string thisOutputDir = outputDir;
1099                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1100                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1101                 remove(outputFileName.c_str());
1102
1103
1104                 for (int i = 0; i < distNames.size(); i++) {
1105                         if (m->control_pressed) {  return 0; }
1106
1107                         string thisDistFile = distNames[i].begin()->first;
1108
1109                         m->appendFiles(thisDistFile, outputFileName);
1110                 }
1111
1112                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1113
1114 #ifdef USE_MPI
1115                 }
1116 #endif
1117
1118
1119
1120
1121         }
1122         catch(exception& e) {
1123                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1124                 exit(1);
1125         }
1126 }
1127 //**********************************************************************************************************************