clustersplitcommand.cpp

   1 /*
   2  *  clustersplitcommand.cpp
   3  *  Mothur
   4  *
   5  *  Created by westcott on 5/19/10.
   6  *  Copyright 2010 Schloss Lab. All rights reserved.
   7  *
   8  */
   9
  10 #include "clustersplitcommand.h"
  11 #include "readcluster.h"
  12 #include "splitmatrix.h"
  13 #include "readphylip.h"
  14 #include "readcolumn.h"
  15 #include "readmatrix.hpp"
  16 #include "inputdata.h"
  17
  18
  19 //**********************************************************************************************************************
  20 vector<string> ClusterSplitCommand::getValidParameters(){
  21         try {
  22                 string AlignArray[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  23                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
  24                 return myArray;
  25         }
  26         catch(exception& e) {
  27                 m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
  28                 exit(1);
  29         }
  30 }
  31 //**********************************************************************************************************************
  32 ClusterSplitCommand::ClusterSplitCommand(){
  33         try {
  34                 abort = true; calledHelp = true;
  35                 vector<string> tempOutNames;
  36                 outputTypes["list"] = tempOutNames;
  37                 outputTypes["rabund"] = tempOutNames;
  38                 outputTypes["sabund"] = tempOutNames;
  39                 outputTypes["column"] = tempOutNames;
  40         }
  41         catch(exception& e) {
  42                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
  43                 exit(1);
  44         }
  45 }
  46 //**********************************************************************************************************************
  47 vector<string> ClusterSplitCommand::getRequiredParameters(){
  48         try {
  49                 string Array[] =  {"fasta","phylip","column","or"};
  50                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  51                 return myArray;
  52         }
  53         catch(exception& e) {
  54                 m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
  55                 exit(1);
  56         }
  57 }
  58 //**********************************************************************************************************************
  59 vector<string> ClusterSplitCommand::getRequiredFiles(){
  60         try {
  61                 vector<string> myArray;
  62                 return myArray;
  63         }
  64         catch(exception& e) {
  65                 m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
  66                 exit(1);
  67         }
  68 }
  69 //**********************************************************************************************************************
  70 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
  71 ClusterSplitCommand::ClusterSplitCommand(string option)  {
  72         try{
  73                 globaldata = GlobalData::getInstance();
  74                 abort = false; calledHelp = false;
  75                 format = "";
  76
  77                 //allow user to run help
  78                 if(option == "help") { help(); abort = true; calledHelp = true; }
  79
  80                 else {
  81                         //valid paramters for this command
  82                         string Array[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
  83                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
  84
  85                         OptionParser parser(option);
  86                         map<string,string> parameters = parser.getParameters();
  87
  88                         ValidParameters validParameter("cluster.split");
  89
  90                         //check to make sure all parameters are valid for command
  91                         map<string,string>::iterator it;
  92                         for (it = parameters.begin(); it != parameters.end(); it++) {
  93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
  94                                         abort = true;
  95                                 }
  96                         }
  97
  98                         //initialize outputTypes
  99                         vector<string> tempOutNames;
 100                         outputTypes["list"] = tempOutNames;
 101                         outputTypes["rabund"] = tempOutNames;
 102                         outputTypes["sabund"] = tempOutNames;
 103                         outputTypes["column"] = tempOutNames;
 104
 105                         globaldata->newRead();
 106
 107                         //if the user changes the output directory command factory will send this info to us in the output parameter
 108                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 109
 110                                 //if the user changes the input directory command factory will send this info to us in the output parameter
 111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
 112                         if (inputDir == "not found"){   inputDir = "";          }
 113                         else {
 114                                 string path;
 115                                 it = parameters.find("phylip");
 116                                 //user has given a template file
 117                                 if(it != parameters.end()){
 118                                         path = m->hasPath(it->second);
 119                                         //if the user has not given a path then, add inputdir. else leave path alone.
 120                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
 121                                 }
 122
 123                                 it = parameters.find("column");
 124                                 //user has given a template file
 125                                 if(it != parameters.end()){
 126                                         path = m->hasPath(it->second);
 127                                         //if the user has not given a path then, add inputdir. else leave path alone.
 128                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
 129                                 }
 130
 131                                 it = parameters.find("name");
 132                                 //user has given a template file
 133                                 if(it != parameters.end()){
 134                                         path = m->hasPath(it->second);
 135                                         //if the user has not given a path then, add inputdir. else leave path alone.
 136                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
 137                                 }
 138
 139                                 it = parameters.find("taxonomy");
 140                                 //user has given a template file
 141                                 if(it != parameters.end()){
 142                                         path = m->hasPath(it->second);
 143                                         //if the user has not given a path then, add inputdir. else leave path alone.
 144                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
 145                                 }
 146
 147                                 it = parameters.find("fasta");
 148                                 //user has given a template file
 149                                 if(it != parameters.end()){
 150                                         path = m->hasPath(it->second);
 151                                         //if the user has not given a path then, add inputdir. else leave path alone.
 152                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
 153                                 }
 154                         }
 155
 156                         //check for required parameters
 157                         phylipfile = validParameter.validFile(parameters, "phylip", true);
 158                         if (phylipfile == "not open") { abort = true; }
 159                         else if (phylipfile == "not found") { phylipfile = ""; }
 160                         else {  distfile = phylipfile;  format = "phylip";      }
 161
 162                         columnfile = validParameter.validFile(parameters, "column", true);
 163                         if (columnfile == "not open") { abort = true; }
 164                         else if (columnfile == "not found") { columnfile = ""; }
 165                         else {  distfile = columnfile; format = "column";       }
 166
 167                         namefile = validParameter.validFile(parameters, "name", true);
 168                         if (namefile == "not open") { abort = true; }
 169                         else if (namefile == "not found") { namefile = ""; }
 170
 171                         fastafile = validParameter.validFile(parameters, "fasta", true);
 172                         if (fastafile == "not open") { abort = true; }
 173                         else if (fastafile == "not found") { fastafile = ""; }
 174                         else { distfile = fastafile;  splitmethod = "fasta";  }
 175
 176                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
 177                         if (taxFile == "not open") { abort = true; }
 178                         else if (taxFile == "not found") { taxFile = ""; }
 179
 180                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); abort = true; }
 181                         else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
 182
 183                         if (columnfile != "") {
 184                                 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
 185                         }
 186
 187                         if (fastafile != "") {
 188                                 if (taxFile == "") { m->mothurOut("You need to provide a taxonomy file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 189                                 if (namefile == "") { m->mothurOut("You need to provide a names file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
 190                         }
 191
 192                         //check for optional parameter and set defaults
 193                         // ...at some point should added some additional type checking...
 194                         //get user cutoff and precision or use defaults
 195                         string temp;
 196                         temp = validParameter.validFile(parameters, "precision", false);
 197                         if (temp == "not found") { temp = "100"; }
 198                         //saves precision legnth for formatting below
 199                         length = temp.length();
 200                         convert(temp, precision);
 201
 202                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
 203                         hard = m->isTrue(temp);
 204
 205                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
 206                         large = m->isTrue(temp);
 207
 208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
 209                         convert(temp, processors);
 210
 211                         temp = validParameter.validFile(parameters, "splitmethod", false);
 212                         if (splitmethod != "fasta") {
 213                                 if (temp == "not found")  { splitmethod = "distance"; }
 214                                 else {  splitmethod = temp; }
 215                         }
 216
 217                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
 218                         convert(temp, cutoff);
 219                         cutoff += (5 / (precision * 10.0));
 220
 221                         temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
 222                         convert(temp, taxLevelCutoff);
 223
 224                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
 225
 226                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
 227                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 228
 229                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
 230                         else { m->mothurOut(splitmethod + " is not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
 231
 232                         if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
 233
 234                         showabund = validParameter.validFile(parameters, "showabund", false);
 235                         if (showabund == "not found") { showabund = "T"; }
 236
 237                         timing = validParameter.validFile(parameters, "timing", false);
 238                         if (timing == "not found") { timing = "F"; }
 239
 240                 }
 241         }
 242         catch(exception& e) {
 243                 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
 244                 exit(1);
 245         }
 246 }
 247
 248 //**********************************************************************************************************************
 249
 250 void ClusterSplitCommand::help(){
 251         try {
 252                 m->mothurOut("The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n");
 253                 m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n");
 254                 m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n");
 255                 m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify.  \n");
 256                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n");
 257                 m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file.  \n");
 258                 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n");
 259                 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
 260                 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file. \n");
 261                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
 262                 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
 263                 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
 264                 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
 265                 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
 266                 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
 267                 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
 268                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
 269                 #ifdef USE_MPI
 270                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
 271                 #endif
 272                 m->mothurOut("The cluster.split command should be in the following format: \n");
 273                 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
 274                 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");
 275
 276         }
 277         catch(exception& e) {
 278                 m->errorOut(e, "ClusterSplitCommand", "help");
 279                 exit(1);
 280         }
 281 }
 282
 283 //**********************************************************************************************************************
 284
 285 ClusterSplitCommand::~ClusterSplitCommand(){}
 286
 287 //**********************************************************************************************************************
 288
 289 int ClusterSplitCommand::execute(){
 290         try {
 291
 292                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 293
 294                 time_t estart;
 295                 vector<string> listFileNames;
 296                 set<string> labels;
 297                 string singletonName = "";
 298                 double saveCutoff = cutoff;
 299
 300                 //****************** file prep work ******************************//
 301                 #ifdef USE_MPI
 302                         int pid;
 303                         int tag = 2001;
 304                         MPI_Status status;
 305                         MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
 306                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 307
 308                         if (pid == 0) { //only process 0 converts and splits
 309
 310                 #endif
 311
 312                 //if user gave a phylip file convert to column file
 313                 if (format == "phylip") {
 314                         estart = time(NULL);
 315                         m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
 316
 317                         ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
 318
 319                         NameAssignment* nameMap = NULL;
 320                         convert->setFormat("phylip");
 321                         convert->read(nameMap);
 322
 323                         if (m->control_pressed) {  delete convert;  return 0;  }
 324
 325                         distfile = convert->getOutputFile();
 326
 327                         //if no names file given with phylip file, create it
 328                         ListVector* listToMakeNameFile =  convert->getListVector();
 329                         if (namefile == "") {  //you need to make a namefile for split matrix
 330                                 ofstream out;
 331                                 namefile = phylipfile + ".names";
 332                                 m->openOutputFile(namefile, out);
 333                                 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
 334                                         string bin = listToMakeNameFile->get(i);
 335                                         out << bin << '\t' << bin << endl;
 336                                 }
 337                                 out.close();
 338                         }
 339                         delete listToMakeNameFile;
 340                         delete convert;
 341
 342                         m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
 343                 }
 344                 if (m->control_pressed) { return 0; }
 345
 346                 estart = time(NULL);
 347                 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
 348
 349                 //split matrix into non-overlapping groups
 350                 SplitMatrix* split;
 351                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
 352                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
 353                 else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
 354                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
 355
 356                 split->split();
 357
 358                 if (m->control_pressed) { delete split; return 0; }
 359
 360                 singletonName = split->getSingletonNames();
 361                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
 362                 delete split;
 363
 364                 //output a merged distance file
 365                 if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
 366
 367
 368                 if (m->control_pressed) { return 0; }
 369
 370                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
 371                 estart = time(NULL);
 372
 373                 //****************** break up files between processes and cluster each file set ******************************//
 374         #ifdef USE_MPI
 375                         ////you are process 0 from above////
 376
 377                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 378                         dividedNames.resize(processors);
 379
 380                         //for each file group figure out which process will complete it
 381                         //want to divide the load intelligently so the big files are spread between processes
 382                         for (int i = 0; i < distName.size(); i++) {
 383                                 int processToAssign = (i+1) % processors;
 384                                 if (processToAssign == 0) { processToAssign = processors; }
 385
 386                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 387                         }
 388
 389                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 390                         for (int i = 0; i < processors; i++) {
 391                                 int remainder = ((i+1) % processors);
 392                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 393                         }
 394
 395
 396                         //send each child the list of files it needs to process
 397                         for(int i = 1; i < processors; i++) {
 398                                 //send number of file pairs
 399                                 int num = dividedNames[i].size();
 400                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 401
 402                                 for (int j = 0; j < num; j++) { //send filenames to process i
 403                                         char tempDistFileName[1024];
 404                                         strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
 405                                         int lengthDist = (dividedNames[i][j].begin()->first).length();
 406
 407                                         MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 408                                         MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 409
 410                                         char tempNameFileName[1024];
 411                                         strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
 412                                         int lengthName = (dividedNames[i][j].begin()->second).length();
 413
 414                                         MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
 415                                         MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
 416                                 }
 417                         }
 418
 419                         //process your share
 420                         listFileNames = cluster(dividedNames[0], labels);
 421
 422                         //receive the other processes info
 423                         for(int i = 1; i < processors; i++) {
 424                                 int num = dividedNames[i].size();
 425
 426                                 double tempCutoff;
 427                                 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
 428                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 429
 430                                 //send list filenames to root process
 431                                 for (int j = 0; j < num; j++) {
 432                                         int lengthList = 0;
 433                                         char tempListFileName[1024];
 434
 435                                         MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 436                                         MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 437
 438                                         string myListFileName = tempListFileName;
 439                                         myListFileName = myListFileName.substr(0, lengthList);
 440
 441                                         listFileNames.push_back(myListFileName);
 442                                 }
 443
 444                                 //send Labels to root process
 445                                 int numLabels = 0;
 446                                 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 447
 448                                 for (int j = 0; j < numLabels; j++) {
 449                                         int lengthLabel = 0;
 450                                         char tempLabel[100];
 451
 452                                         MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
 453                                         MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
 454
 455                                         string myLabel = tempLabel;
 456                                         myLabel = myLabel.substr(0, lengthLabel);
 457
 458                                         if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
 459                                 }
 460                         }
 461
 462                 }else { //you are a child process
 463                         vector < map<string, string> >  myNames;
 464
 465                         //recieve the files you need to process
 466                         //receive number of file pairs
 467                         int num = 0;
 468                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 469
 470                         myNames.resize(num);
 471
 472                         for (int j = 0; j < num; j++) { //receive filenames to process
 473                                 int lengthDist = 0;
 474                                 char tempDistFileName[1024];
 475
 476                                 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 477                                 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 478
 479                                 string myDistFileName = tempDistFileName;
 480                                 myDistFileName = myDistFileName.substr(0, lengthDist);
 481
 482                                 int lengthName = 0;
 483                                 char tempNameFileName[1024];
 484
 485                                 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
 486                                 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
 487
 488                                 string myNameFileName = tempNameFileName;
 489                                 myNameFileName = myNameFileName.substr(0, lengthName);
 490
 491                                 //save file name
 492                                 myNames[j][myDistFileName] = myNameFileName;
 493                         }
 494
 495                         //process them
 496                         listFileNames = cluster(myNames, labels);
 497
 498                         //send cutoff
 499                         MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
 500
 501                         //send list filenames to root process
 502                         for (int j = 0; j < num; j++) {
 503                                 char tempListFileName[1024];
 504                                 strcpy(tempListFileName, listFileNames[j].c_str());
 505                                 int lengthList = listFileNames[j].length();
 506
 507                                 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 508                                 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 509                         }
 510
 511                         //send Labels to root process
 512                         int numLabels = labels.size();
 513                         MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 514
 515                         for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
 516                                 char tempLabel[100];
 517                                 strcpy(tempLabel, (*it).c_str());
 518                                 int lengthLabel = (*it).length();
 519
 520                                 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
 521                                 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
 522                         }
 523                 }
 524
 525                 //make everyone wait
 526                 MPI_Barrier(MPI_COMM_WORLD);
 527
 528         #else
 529
 530                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 531                                 if(processors == 1){
 532                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 533                                 }else{
 534                                         vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
 535                                         dividedNames.resize(processors);
 536
 537                                         //for each file group figure out which process will complete it
 538                                         //want to divide the load intelligently so the big files are spread between processes
 539                                         for (int i = 0; i < distName.size(); i++) {
 540                                                 int processToAssign = (i+1) % processors;
 541                                                 if (processToAssign == 0) { processToAssign = processors; }
 542
 543                                                 dividedNames[(processToAssign-1)].push_back(distName[i]);
 544                                         }
 545
 546                                         //not lets reverse the order of ever other process, so we balance big files running with little ones
 547                                         for (int i = 0; i < processors; i++) {
 548                                                 int remainder = ((i+1) % processors);
 549                                                 if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
 550                                         }
 551
 552                                         createProcesses(dividedNames);
 553
 554                                         if (m->control_pressed) { return 0; }
 555
 556                                         //get list of list file names from each process
 557                                         for(int i=0;i<processors;i++){
 558                                                 string filename = toString(processIDS[i]) + ".temp";
 559                                                 ifstream in;
 560                                                 m->openInputFile(filename, in);
 561
 562                                                 in >> tag; m->gobble(in);
 563
 564                                                 while(!in.eof()) {
 565                                                         string tempName;
 566                                                         in >> tempName; m->gobble(in);
 567                                                         listFileNames.push_back(tempName);
 568                                                 }
 569                                                 in.close();
 570                                                 remove((toString(processIDS[i]) + ".temp").c_str());
 571
 572                                                 //get labels
 573                                                 filename = toString(processIDS[i]) + ".temp.labels";
 574                                                 ifstream in2;
 575                                                 m->openInputFile(filename, in2);
 576
 577                                                 float tempCutoff;
 578                                                 in2 >> tempCutoff; m->gobble(in2);
 579                                                 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
 580
 581                                                 while(!in2.eof()) {
 582                                                         string tempName;
 583                                                         in2 >> tempName; m->gobble(in2);
 584                                                         if (labels.count(tempName) == 0) { labels.insert(tempName); }
 585                                                 }
 586                                                 in2.close();
 587                                                 remove((toString(processIDS[i]) + ".temp.labels").c_str());
 588                                         }
 589                                 }
 590                 #else
 591                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
 592                 #endif
 593         #endif
 594                 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
 595
 596                 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
 597
 598                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
 599
 600                 //****************** merge list file and create rabund and sabund files ******************************//
 601                 estart = time(NULL);
 602                 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
 603
 604                 #ifdef USE_MPI
 605                         if (pid == 0) { //only process 0 merges
 606                 #endif
 607
 608                 ListVector* listSingle;
 609                 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
 610
 611                 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 612
 613                 mergeLists(listFileNames, labelBins, listSingle);
 614
 615                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 616
 617                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
 618
 619                 //set list file as new current listfile
 620                 string current = "";
 621                 itTypes = outputTypes.find("list");
 622                 if (itTypes != outputTypes.end()) {
 623                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
 624                 }
 625
 626                 //set rabund file as new current rabundfile
 627                 itTypes = outputTypes.find("rabund");
 628                 if (itTypes != outputTypes.end()) {
 629                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
 630                 }
 631
 632                 //set sabund file as new current sabundfile
 633                 itTypes = outputTypes.find("sabund");
 634                 if (itTypes != outputTypes.end()) {
 635                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
 636                 }
 637
 638                 m->mothurOutEndLine();
 639                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 640                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
 641                 m->mothurOutEndLine();
 642
 643                 #ifdef USE_MPI
 644                         } //only process 0 merges
 645
 646                         //make everyone wait
 647                         MPI_Barrier(MPI_COMM_WORLD);
 648                 #endif
 649
 650                 return 0;
 651         }
 652         catch(exception& e) {
 653                 m->errorOut(e, "ClusterSplitCommand", "execute");
 654                 exit(1);
 655         }
 656 }
 657 //**********************************************************************************************************************
 658 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
 659         try {
 660
 661                 map<float, int> labelBin;
 662                 vector<float> orderFloat;
 663                 int numSingleBins;
 664
 665                 //read in singletons
 666                 if (singleton != "none") {
 667                         ifstream in;
 668                         m->openInputFile(singleton, in);
 669
 670                         string firstCol, secondCol;
 671                         listSingle = new ListVector();
 672                         while (!in.eof()) {
 673                                 in >> firstCol >> secondCol; m->gobble(in);
 674                                 listSingle->push_back(secondCol);
 675                         }
 676                         in.close();
 677                         remove(singleton.c_str());
 678
 679                         numSingleBins = listSingle->getNumBins();
 680                 }else{  listSingle = NULL; numSingleBins = 0;  }
 681
 682                 //go through users set and make them floats so we can sort them
 683                 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
 684                         float temp = -10.0;
 685
 686                         if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
 687                         else if (*it == "unique")                                                                               {       temp = -1.0;            }
 688
 689                         if (temp <= cutoff) {
 690                                 orderFloat.push_back(temp);
 691                                 labelBin[temp] = numSingleBins; //initialize numbins
 692                         }
 693                 }
 694
 695                 //sort order
 696                 sort(orderFloat.begin(), orderFloat.end());
 697                 userLabels.clear();
 698
 699                 //get the list info from each file
 700                 for (int k = 0; k < listNames.size(); k++) {
 701
 702                         if (m->control_pressed) {
 703                                 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
 704                                 for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
 705                                 return labelBin;
 706                         }
 707
 708                         InputData* input = new InputData(listNames[k], "list");
 709                         ListVector* list = input->getListVector();
 710                         string lastLabel = list->getLabel();
 711
 712                         string filledInList = listNames[k] + "filledInTemp";
 713                         ofstream outFilled;
 714                         m->openOutputFile(filledInList, outFilled);
 715
 716                         //for each label needed
 717                         for(int l = 0; l < orderFloat.size(); l++){
 718
 719                                 string thisLabel;
 720                                 if (orderFloat[l] == -1) { thisLabel = "unique"; }
 721                                 else { thisLabel = toString(orderFloat[l],  length-1);  }
 722
 723                                 //this file has reached the end
 724                                 if (list == NULL) {
 725                                         list = input->getListVector(lastLabel, true);
 726                                 }else{  //do you have the distance, or do you need to fill in
 727
 728                                         float labelFloat;
 729                                         if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
 730                                         else { convert(list->getLabel(), labelFloat); }
 731
 732                                         //check for missing labels
 733                                         if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
 734                                                 //if its bigger get last label, otherwise keep it
 735                                                 delete list;
 736                                                 list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
 737                                         }
 738                                         lastLabel = list->getLabel();
 739                                 }
 740
 741                                 //print to new file
 742                                 list->setLabel(thisLabel);
 743                                 list->print(outFilled);
 744
 745                                 //update labelBin
 746                                 labelBin[orderFloat[l]] += list->getNumBins();
 747
 748                                 delete list;
 749
 750                                 list = input->getListVector();
 751                         }
 752
 753                         if (list != NULL) { delete list; }
 754                         delete input;
 755
 756                         outFilled.close();
 757                         remove(listNames[k].c_str());
 758                         rename(filledInList.c_str(), listNames[k].c_str());
 759                 }
 760
 761                 return labelBin;
 762         }
 763         catch(exception& e) {
 764                 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
 765                 exit(1);
 766         }
 767 }
 768 //**********************************************************************************************************************
 769 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
 770         try {
 771                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
 772                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
 773
 774                 m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
 775                 m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
 776                 m->openOutputFile(fileroot+ tag + ".list",              outList);
 777
 778                 outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
 779                 outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
 780                 outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
 781
 782                 map<float, int>::iterator itLabel;
 783
 784                 //for each label needed
 785                 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
 786
 787                         string thisLabel;
 788                         if (itLabel->first == -1) { thisLabel = "unique"; }
 789                         else { thisLabel = toString(itLabel->first,  length-1);  }
 790
 791                         outList << thisLabel << '\t' << itLabel->second << '\t';
 792
 793                         RAbundVector* rabund = new RAbundVector();
 794                         rabund->setLabel(thisLabel);
 795
 796                         //add in singletons
 797                         if (listSingle != NULL) {
 798                                 for (int j = 0; j < listSingle->getNumBins(); j++) {
 799                                         outList << listSingle->get(j) << '\t';
 800                                         rabund->push_back(m->getNumNames(listSingle->get(j)));
 801                                 }
 802                         }
 803
 804                         //get the list info from each file
 805                         for (int k = 0; k < listNames.size(); k++) {
 806
 807                                 if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
 808
 809                                 InputData* input = new InputData(listNames[k], "list");
 810                                 ListVector* list = input->getListVector(thisLabel);
 811
 812                                 //this file has reached the end
 813                                 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }
 814                                 else {
 815                                         for (int j = 0; j < list->getNumBins(); j++) {
 816                                                 outList << list->get(j) << '\t';
 817                                                 rabund->push_back(m->getNumNames(list->get(j)));
 818                                         }
 819                                         delete list;
 820                                 }
 821                                 delete input;
 822                         }
 823
 824                         SAbundVector sabund = rabund->getSAbundVector();
 825
 826                         sabund.print(outSabund);
 827                         rabund->print(outRabund);
 828                         outList << endl;
 829
 830                         delete rabund;
 831                 }
 832
 833                 outList.close();
 834                 outRabund.close();
 835                 outSabund.close();
 836
 837                 if (listSingle != NULL) { delete listSingle;  }
 838
 839                 for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
 840
 841                 return 0;
 842         }
 843         catch(exception& e) {
 844                 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
 845                 exit(1);
 846         }
 847 }
 848
 849 //**********************************************************************************************************************
 850
 851 void ClusterSplitCommand::printData(ListVector* oldList){
 852         try {
 853                 string label = oldList->getLabel();
 854                 RAbundVector oldRAbund = oldList->getRAbundVector();
 855
 856                 oldRAbund.setLabel(label);
 857                 if (m->isTrue(showabund)) {
 858                         oldRAbund.getSAbundVector().print(cout);
 859                 }
 860                 oldRAbund.print(outRabund);
 861                 oldRAbund.getSAbundVector().print(outSabund);
 862
 863                 oldList->print(outList);
 864         }
 865         catch(exception& e) {
 866                 m->errorOut(e, "ClusterSplitCommand", "printData");
 867                 exit(1);
 868         }
 869 }
 870 //**********************************************************************************************************************
 871 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
 872         try {
 873
 874         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 875                 int process = 0;
 876                 int exitCommand = 1;
 877                 processIDS.clear();
 878
 879                 //loop through and create all the processes you want
 880                 while (process != processors) {
 881                         int pid = fork();
 882
 883                         if (pid > 0) {
 884                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
 885                                 process++;
 886                         }else if (pid == 0){
 887                                 set<string> labels;
 888                                 vector<string> listFileNames = cluster(dividedNames[process], labels);
 889
 890                                 //write out names to file
 891                                 string filename = toString(getpid()) + ".temp";
 892                                 ofstream out;
 893                                 m->openOutputFile(filename, out);
 894                                 out << tag << endl;
 895                                 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
 896                                 out.close();
 897
 898                                 //print out labels
 899                                 ofstream outLabels;
 900                                 filename = toString(getpid()) + ".temp.labels";
 901                                 m->openOutputFile(filename, outLabels);
 902
 903                                 outLabels << cutoff << endl;
 904                                 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
 905                                         outLabels << (*it) << endl;
 906                                 }
 907                                 outLabels.close();
 908
 909                                 exit(0);
 910                         }else {
 911                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
 912                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
 913                                 exit(0);
 914                         }
 915                 }
 916
 917                 //force parent to wait until all the processes are done
 918                 for (int i=0;i<processors;i++) {
 919                         int temp = processIDS[i];
 920                         wait(&temp);
 921                 }
 922
 923                 return exitCommand;
 924         #endif
 925
 926         }
 927         catch(exception& e) {
 928                 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
 929                 exit(1);
 930         }
 931 }
 932 //**********************************************************************************************************************
 933
 934 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
 935         try {
 936                 Cluster* cluster;
 937                 SparseMatrix* matrix;
 938                 ListVector* list;
 939                 ListVector oldList;
 940                 RAbundVector* rabund;
 941
 942                 vector<string> listFileNames;
 943
 944                 double smallestCutoff = cutoff;
 945
 946                 //cluster each distance file
 947                 for (int i = 0; i < distNames.size(); i++) {
 948                         if (m->control_pressed) { return listFileNames; }
 949
 950                         string thisNamefile = distNames[i].begin()->second;
 951                         string thisDistFile = distNames[i].begin()->first;
 952
 953                         //read in distance file
 954                         globaldata->setNameFile(thisNamefile);
 955                         globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
 956
 957                         #ifdef USE_MPI
 958                                 int pid;
 959                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
 960
 961                                 //output your files too
 962                                 if (pid != 0) {
 963                                         cout << endl << "Reading " << thisDistFile << endl;
 964                                 }
 965                         #endif
 966
 967                         m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
 968
 969                         ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
 970                         read->setCutoff(cutoff);
 971
 972                         NameAssignment* nameMap = new NameAssignment(thisNamefile);
 973                         nameMap->readMap();
 974                         read->read(nameMap);
 975
 976                         if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
 977
 978                         list = read->getListVector();
 979                         oldList = *list;
 980                         matrix = read->getMatrix();
 981
 982                         delete read;
 983                         delete nameMap;
 984
 985
 986                         #ifdef USE_MPI
 987                                 //output your files too
 988                                 if (pid != 0) {
 989                                         cout << endl << "Clustering " << thisDistFile << endl;
 990                                 }
 991                         #endif
 992
 993                         m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
 994
 995                         rabund = new RAbundVector(list->getRAbundVector());
 996
 997                         //create cluster
 998                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
 999                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1000                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
1001                         tag = cluster->getTag();
1002
1003                         if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1004                         fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1005
1006                         ofstream listFile;
1007                         m->openOutputFile(fileroot+ tag + ".list",      listFile);
1008
1009                         listFileNames.push_back(fileroot+ tag + ".list");
1010
1011                         float previousDist = 0.00000;
1012                         float rndPreviousDist = 0.00000;
1013
1014                         oldList = *list;
1015
1016                         print_start = true;
1017                         start = time(NULL);
1018                         double saveCutoff = cutoff;
1019
1020                         while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1021
1022                                 if (m->control_pressed) { //clean up
1023                                         delete matrix; delete list;     delete cluster; delete rabund;
1024                                         listFile.close();
1025                                         for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1026                                         listFileNames.clear(); return listFileNames;
1027                                 }
1028
1029                                 cluster->update(saveCutoff);
1030
1031                                 float dist = matrix->getSmallDist();
1032                                 float rndDist;
1033                                 if (hard) {
1034                                         rndDist = m->ceilDist(dist, precision);
1035                                 }else{
1036                                         rndDist = m->roundDist(dist, precision);
1037                                 }
1038
1039                                 if(previousDist <= 0.0000 && dist != previousDist){
1040                                         oldList.setLabel("unique");
1041                                         oldList.print(listFile);
1042                                         if (labels.count("unique") == 0) {  labels.insert("unique");  }
1043                                 }
1044                                 else if(rndDist != rndPreviousDist){
1045                                         oldList.setLabel(toString(rndPreviousDist,  length-1));
1046                                         oldList.print(listFile);
1047                                         if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1048                                 }
1049
1050                                 previousDist = dist;
1051                                 rndPreviousDist = rndDist;
1052                                 oldList = *list;
1053                         }
1054
1055
1056                         if(previousDist <= 0.0000){
1057                                 oldList.setLabel("unique");
1058                                 oldList.print(listFile);
1059                                 if (labels.count("unique") == 0) { labels.insert("unique"); }
1060                         }
1061                         else if(rndPreviousDist<cutoff){
1062                                 oldList.setLabel(toString(rndPreviousDist,  length-1));
1063                                 oldList.print(listFile);
1064                                 if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
1065                         }
1066
1067                         delete matrix; delete list;     delete cluster; delete rabund;
1068                         listFile.close();
1069
1070                         if (m->control_pressed) { //clean up
1071                                 for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
1072                                 listFileNames.clear(); return listFileNames;
1073                         }
1074
1075                         remove(thisDistFile.c_str());
1076                         remove(thisNamefile.c_str());
1077
1078                         if (saveCutoff != cutoff) {
1079                                 if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
1080                                 else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
1081
1082                                 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1083                         }
1084
1085                         if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
1086                 }
1087
1088                 cutoff = smallestCutoff;
1089
1090                 return listFileNames;
1091
1092         }
1093         catch(exception& e) {
1094                 m->errorOut(e, "ClusterSplitCommand", "cluster");
1095                 exit(1);
1096         }
1097
1098
1099 }
1100 //**********************************************************************************************************************
1101
1102 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1103         try{
1104
1105 #ifdef USE_MPI
1106                 int pid;
1107                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1108
1109                 if (pid != 0) {
1110 #endif
1111
1112                 string thisOutputDir = outputDir;
1113                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1114                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
1115                 remove(outputFileName.c_str());
1116
1117
1118                 for (int i = 0; i < distNames.size(); i++) {
1119                         if (m->control_pressed) {  return 0; }
1120
1121                         string thisDistFile = distNames[i].begin()->first;
1122
1123                         m->appendFiles(thisDistFile, outputFileName);
1124                 }
1125
1126                 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1127
1128 #ifdef USE_MPI
1129                 }
1130 #endif
1131
1132                 return 0;
1133
1134
1135         }
1136         catch(exception& e) {
1137                 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1138                 exit(1);
1139         }
1140 }
1141 //**********************************************************************************************************************