2 * clustersplitcommand.cpp
5 * Created by westcott on 5/19/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "clustersplitcommand.h"
13 //**********************************************************************************************************************
14 vector<string> ClusterSplitCommand::setParameters(){
16 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName",false,false); parameters.push_back(ptaxonomy);
17 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none",false,false); parameters.push_back(pphylip);
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
19 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
20 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
21 CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel);
22 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
23 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
24 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
25 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
26 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
27 CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff);
28 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
29 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
30 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
31 CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic);
32 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
33 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
35 vector<string> myArray;
36 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
40 m->errorOut(e, "ClusterSplitCommand", "setParameters");
44 //**********************************************************************************************************************
45 string ClusterSplitCommand::getHelpString(){
47 string helpString = "";
48 helpString += "The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n";
49 helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
50 helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
51 helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify. \n";
52 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequences into distinct taxonomy groups, and split the distance file based on those groups. \n";
53 helpString += "For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file. \n";
54 helpString += "You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n";
55 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
56 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
57 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
58 helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n";
59 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
60 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
61 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
62 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
63 helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
64 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n";
65 helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic. It is only valid with splitmethod=fasta. Default=f.\n";
67 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
69 helpString += "The cluster.split command should be in the following format: \n";
70 helpString += "cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n";
71 helpString += "Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n";
75 m->errorOut(e, "ClusterSplitCommand", "getHelpString");
79 //**********************************************************************************************************************
80 string ClusterSplitCommand::getOutputFileNameTag(string type, string inputName=""){
82 string outputFileName = "";
83 map<string, vector<string> >::iterator it;
85 //is this a type this command creates
86 it = outputTypes.find(type);
87 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
89 if (type == "list") { outputFileName = "list"; }
90 else if (type == "rabund") { outputFileName = "rabund"; }
91 else if (type == "sabund") { outputFileName = "sabund"; }
92 else if (type == "column") { outputFileName = "dist"; }
93 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
95 return outputFileName;
98 m->errorOut(e, "ClusterSplitCommand", "getOutputFileNameTag");
102 //**********************************************************************************************************************
103 ClusterSplitCommand::ClusterSplitCommand(){
105 abort = true; calledHelp = true;
107 vector<string> tempOutNames;
108 outputTypes["list"] = tempOutNames;
109 outputTypes["rabund"] = tempOutNames;
110 outputTypes["sabund"] = tempOutNames;
111 outputTypes["column"] = tempOutNames;
113 catch(exception& e) {
114 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
118 //**********************************************************************************************************************
119 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
120 ClusterSplitCommand::ClusterSplitCommand(string option) {
122 abort = false; calledHelp = false;
125 //allow user to run help
126 if(option == "help") { help(); abort = true; calledHelp = true; }
127 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130 vector<string> myArray = setParameters();
132 OptionParser parser(option);
133 map<string,string> parameters = parser.getParameters();
135 ValidParameters validParameter("cluster.split");
137 //check to make sure all parameters are valid for command
138 map<string,string>::iterator it;
139 for (it = parameters.begin(); it != parameters.end(); it++) {
140 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
145 //initialize outputTypes
146 vector<string> tempOutNames;
147 outputTypes["list"] = tempOutNames;
148 outputTypes["rabund"] = tempOutNames;
149 outputTypes["sabund"] = tempOutNames;
150 outputTypes["column"] = tempOutNames;
152 //if the user changes the output directory command factory will send this info to us in the output parameter
153 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
155 //if the user changes the input directory command factory will send this info to us in the output parameter
156 string inputDir = validParameter.validFile(parameters, "inputdir", false);
157 if (inputDir == "not found"){ inputDir = ""; }
160 it = parameters.find("phylip");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["phylip"] = inputDir + it->second; }
168 it = parameters.find("column");
169 //user has given a template file
170 if(it != parameters.end()){
171 path = m->hasPath(it->second);
172 //if the user has not given a path then, add inputdir. else leave path alone.
173 if (path == "") { parameters["column"] = inputDir + it->second; }
176 it = parameters.find("name");
177 //user has given a template file
178 if(it != parameters.end()){
179 path = m->hasPath(it->second);
180 //if the user has not given a path then, add inputdir. else leave path alone.
181 if (path == "") { parameters["name"] = inputDir + it->second; }
184 it = parameters.find("taxonomy");
185 //user has given a template file
186 if(it != parameters.end()){
187 path = m->hasPath(it->second);
188 //if the user has not given a path then, add inputdir. else leave path alone.
189 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
192 it = parameters.find("fasta");
193 //user has given a template file
194 if(it != parameters.end()){
195 path = m->hasPath(it->second);
196 //if the user has not given a path then, add inputdir. else leave path alone.
197 if (path == "") { parameters["fasta"] = inputDir + it->second; }
201 //check for required parameters
202 phylipfile = validParameter.validFile(parameters, "phylip", true);
203 if (phylipfile == "not open") { abort = true; }
204 else if (phylipfile == "not found") { phylipfile = ""; }
205 else { distfile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
207 columnfile = validParameter.validFile(parameters, "column", true);
208 if (columnfile == "not open") { abort = true; }
209 else if (columnfile == "not found") { columnfile = ""; }
210 else { distfile = columnfile; format = "column"; m->setColumnFile(columnfile); }
212 namefile = validParameter.validFile(parameters, "name", true);
213 if (namefile == "not open") { abort = true; }
214 else if (namefile == "not found") { namefile = ""; }
215 else { m->setNameFile(namefile); }
217 fastafile = validParameter.validFile(parameters, "fasta", true);
218 if (fastafile == "not open") { abort = true; }
219 else if (fastafile == "not found") { fastafile = ""; }
220 else { distfile = fastafile; splitmethod = "fasta"; m->setFastaFile(fastafile); }
222 taxFile = validParameter.validFile(parameters, "taxonomy", true);
223 if (taxFile == "not open") { taxFile = ""; abort = true; }
224 else if (taxFile == "not found") { taxFile = ""; }
225 else { m->setTaxonomyFile(taxFile); if (splitmethod != "fasta") { splitmethod = "classify"; } }
227 if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) {
228 //is there are current file available for either of these?
229 //give priority to column, then phylip, then fasta
230 columnfile = m->getColumnFile();
231 if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
233 phylipfile = m->getPhylipFile();
234 if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
236 fastafile = m->getFastaFile();
237 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
239 m->mothurOut("No valid current files. When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine();
245 else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
247 if (columnfile != "") {
248 if (namefile == "") {
249 namefile = m->getNameFile();
250 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
252 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
258 if (fastafile != "") {
260 taxFile = m->getTaxonomyFile();
261 if (taxFile != "") { m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
263 m->mothurOut("You need to provide a taxonomy file if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
268 if (namefile == "") {
269 namefile = m->getNameFile();
270 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
272 m->mothurOut("You need to provide a namefile if you are if you are using a fasta file to generate the split."); m->mothurOutEndLine();
278 //check for optional parameter and set defaults
279 // ...at some point should added some additional type checking...
280 //get user cutoff and precision or use defaults
282 temp = validParameter.validFile(parameters, "precision", false);
283 if (temp == "not found") { temp = "100"; }
284 //saves precision legnth for formatting below
285 length = temp.length();
286 m->mothurConvert(temp, precision);
288 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
289 hard = m->isTrue(temp);
291 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
292 large = m->isTrue(temp);
294 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
295 m->setProcessors(temp);
296 m->mothurConvert(temp, processors);
298 temp = validParameter.validFile(parameters, "splitmethod", false);
299 if ((splitmethod != "fasta") && (splitmethod != "classify")) {
300 if (temp == "not found") { splitmethod = "distance"; }
301 else { splitmethod = temp; }
304 temp = validParameter.validFile(parameters, "classic", false); if (temp == "not found") { temp = "F"; }
305 classic = m->isTrue(temp);
307 if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; }
309 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.25"; }
310 m->mothurConvert(temp, cutoff);
311 cutoff += (5 / (precision * 10.0));
313 temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "3"; }
314 m->mothurConvert(temp, taxLevelCutoff);
316 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "average"; }
318 if ((method == "furthest") || (method == "nearest") || (method == "average")) { m->mothurOut("Using splitmethod " + splitmethod + ".\n"); }
319 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
321 if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
322 else { m->mothurOut(splitmethod + " is not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
324 if ((splitmethod == "classify") && (taxFile == "")) { m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true; }
326 showabund = validParameter.validFile(parameters, "showabund", false);
327 if (showabund == "not found") { showabund = "T"; }
329 timing = validParameter.validFile(parameters, "timing", false);
330 if (timing == "not found") { timing = "F"; }
334 catch(exception& e) {
335 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
340 //**********************************************************************************************************************
342 int ClusterSplitCommand::execute(){
345 if (abort == true) { if (calledHelp) { return 0; } return 2; }
348 vector<string> listFileNames;
350 string singletonName = "";
351 double saveCutoff = cutoff;
353 //****************** file prep work ******************************//
358 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
359 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
361 if (pid == 0) { //only process 0 converts and splits
365 //if user gave a phylip file convert to column file
366 if (format == "phylip") {
368 m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
370 ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
372 NameAssignment* nameMap = NULL;
373 convert->setFormat("phylip");
374 convert->read(nameMap);
376 if (m->control_pressed) { delete convert; return 0; }
378 distfile = convert->getOutputFile();
380 //if no names file given with phylip file, create it
381 ListVector* listToMakeNameFile = convert->getListVector();
382 if (namefile == "") { //you need to make a namefile for split matrix
384 namefile = phylipfile + ".names";
385 m->openOutputFile(namefile, out);
386 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
387 string bin = listToMakeNameFile->get(i);
388 out << bin << '\t' << bin << endl;
392 delete listToMakeNameFile;
395 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
397 if (m->control_pressed) { return 0; }
400 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
402 //split matrix into non-overlapping groups
404 if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); }
405 else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); }
406 else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir); }
407 else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; }
411 if (m->control_pressed) { delete split; return 0; }
413 singletonName = split->getSingletonNames();
414 vector< map<string, string> > distName = split->getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
417 //output a merged distance file
418 if (splitmethod == "fasta") { createMergedDistanceFile(distName); }
421 if (m->control_pressed) { return 0; }
423 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
426 //****************** break up files between processes and cluster each file set ******************************//
428 ////you are process 0 from above////
430 vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
431 dividedNames.resize(processors);
433 //for each file group figure out which process will complete it
434 //want to divide the load intelligently so the big files are spread between processes
435 for (int i = 0; i < distName.size(); i++) {
436 int processToAssign = (i+1) % processors;
437 if (processToAssign == 0) { processToAssign = processors; }
439 dividedNames[(processToAssign-1)].push_back(distName[i]);
442 //not lets reverse the order of ever other process, so we balance big files running with little ones
443 for (int i = 0; i < processors; i++) {
444 int remainder = ((i+1) % processors);
445 if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); }
449 //send each child the list of files it needs to process
450 for(int i = 1; i < processors; i++) {
451 //send number of file pairs
452 int num = dividedNames[i].size();
453 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
455 for (int j = 0; j < num; j++) { //send filenames to process i
456 char tempDistFileName[1024];
457 strcpy(tempDistFileName, (dividedNames[i][j].begin()->first).c_str());
458 int lengthDist = (dividedNames[i][j].begin()->first).length();
460 MPI_Send(&lengthDist, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
461 MPI_Send(tempDistFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
463 char tempNameFileName[1024];
464 strcpy(tempNameFileName, (dividedNames[i][j].begin()->second).c_str());
465 int lengthName = (dividedNames[i][j].begin()->second).length();
467 MPI_Send(&lengthName, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
468 MPI_Send(tempNameFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
473 listFileNames = cluster(dividedNames[0], labels);
475 //receive the other processes info
476 for(int i = 1; i < processors; i++) {
477 int num = dividedNames[i].size();
480 MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
481 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
483 //send list filenames to root process
484 for (int j = 0; j < num; j++) {
486 char tempListFileName[1024];
488 MPI_Recv(&lengthList, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
489 MPI_Recv(tempListFileName, 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
491 string myListFileName = tempListFileName;
492 myListFileName = myListFileName.substr(0, lengthList);
494 listFileNames.push_back(myListFileName);
497 //send Labels to root process
499 MPI_Recv(&numLabels, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
501 for (int j = 0; j < numLabels; j++) {
505 MPI_Recv(&lengthLabel, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
506 MPI_Recv(tempLabel, 100, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
508 string myLabel = tempLabel;
509 myLabel = myLabel.substr(0, lengthLabel);
511 if (labels.count(myLabel) == 0) { labels.insert(myLabel); }
515 }else { //you are a child process
516 vector < map<string, string> > myNames;
518 //recieve the files you need to process
519 //receive number of file pairs
521 MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
525 for (int j = 0; j < num; j++) { //receive filenames to process
527 char tempDistFileName[1024];
529 MPI_Recv(&lengthDist, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
530 MPI_Recv(tempDistFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
532 string myDistFileName = tempDistFileName;
533 myDistFileName = myDistFileName.substr(0, lengthDist);
536 char tempNameFileName[1024];
538 MPI_Recv(&lengthName, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
539 MPI_Recv(tempNameFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
541 string myNameFileName = tempNameFileName;
542 myNameFileName = myNameFileName.substr(0, lengthName);
545 myNames[j][myDistFileName] = myNameFileName;
549 listFileNames = cluster(myNames, labels);
552 MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
554 //send list filenames to root process
555 for (int j = 0; j < num; j++) {
556 char tempListFileName[1024];
557 strcpy(tempListFileName, listFileNames[j].c_str());
558 int lengthList = listFileNames[j].length();
560 MPI_Send(&lengthList, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
561 MPI_Send(tempListFileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
564 //send Labels to root process
565 int numLabels = labels.size();
566 MPI_Send(&numLabels, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
568 for(set<string>::iterator it = labels.begin(); it != labels.end(); ++it) {
570 strcpy(tempLabel, (*it).c_str());
571 int lengthLabel = (*it).length();
573 MPI_Send(&lengthLabel, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
574 MPI_Send(tempLabel, 100, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
579 MPI_Barrier(MPI_COMM_WORLD);
582 ///////////////////// WINDOWS CAN ONLY USE 1 PROCESSORS ACCESS VIOLATION UNRESOLVED ///////////////////////
584 if (processors > distName.size()) { processors = distName.size(); }
586 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
588 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
590 listFileNames = createProcesses(distName, labels);
593 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
596 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } return 0; }
598 if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); }
600 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
602 //****************** merge list file and create rabund and sabund files ******************************//
604 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
607 if (pid == 0) { //only process 0 merges
610 ListVector* listSingle;
611 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
613 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
615 mergeLists(listFileNames, labelBins, listSingle);
617 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
619 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
621 //set list file as new current listfile
623 itTypes = outputTypes.find("list");
624 if (itTypes != outputTypes.end()) {
625 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
628 //set rabund file as new current rabundfile
629 itTypes = outputTypes.find("rabund");
630 if (itTypes != outputTypes.end()) {
631 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
634 //set sabund file as new current sabundfile
635 itTypes = outputTypes.find("sabund");
636 if (itTypes != outputTypes.end()) {
637 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
640 m->mothurOutEndLine();
641 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
642 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
643 m->mothurOutEndLine();
646 } //only process 0 merges
649 MPI_Barrier(MPI_COMM_WORLD);
654 catch(exception& e) {
655 m->errorOut(e, "ClusterSplitCommand", "execute");
659 //**********************************************************************************************************************
660 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
663 map<float, int> labelBin;
664 vector<float> orderFloat;
668 if (singleton != "none") {
670 m->openInputFile(singleton, in);
672 string firstCol, secondCol;
673 listSingle = new ListVector();
675 in >> firstCol >> secondCol; m->gobble(in);
676 listSingle->push_back(secondCol);
679 m->mothurRemove(singleton);
681 numSingleBins = listSingle->getNumBins();
682 }else{ listSingle = NULL; numSingleBins = 0; }
684 //go through users set and make them floats so we can sort them
685 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
688 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) { convert(*it, temp); }
689 else if (*it == "unique") { temp = -1.0; }
691 if (temp <= cutoff) {
692 orderFloat.push_back(temp);
693 labelBin[temp] = numSingleBins; //initialize numbins
698 sort(orderFloat.begin(), orderFloat.end());
701 //get the list info from each file
702 for (int k = 0; k < listNames.size(); k++) {
704 if (m->control_pressed) {
705 if (listSingle != NULL) { delete listSingle; listSingle = NULL; m->mothurRemove(singleton); }
706 for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]); }
710 InputData* input = new InputData(listNames[k], "list");
711 ListVector* list = input->getListVector();
712 string lastLabel = list->getLabel();
714 string filledInList = listNames[k] + "filledInTemp";
716 m->openOutputFile(filledInList, outFilled);
718 //for each label needed
719 for(int l = 0; l < orderFloat.size(); l++){
722 if (orderFloat[l] == -1) { thisLabel = "unique"; }
723 else { thisLabel = toString(orderFloat[l], length-1); }
725 //this file has reached the end
727 list = input->getListVector(lastLabel, true);
728 }else{ //do you have the distance, or do you need to fill in
731 if (list->getLabel() == "unique") { labelFloat = -1.0; }
732 else { convert(list->getLabel(), labelFloat); }
734 //check for missing labels
735 if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
736 //if its bigger get last label, otherwise keep it
738 list = input->getListVector(lastLabel, true); //get last list vector to use, you actually want to move back in the file
740 lastLabel = list->getLabel();
744 list->setLabel(thisLabel);
745 list->print(outFilled);
748 labelBin[orderFloat[l]] += list->getNumBins();
752 list = input->getListVector();
755 if (list != NULL) { delete list; }
759 m->mothurRemove(listNames[k]);
760 rename(filledInList.c_str(), listNames[k].c_str());
765 catch(exception& e) {
766 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
770 //**********************************************************************************************************************
771 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
773 if (outputDir == "") { outputDir += m->hasPath(distfile); }
774 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
776 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
777 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
778 string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
780 m->openOutputFile(sabundFileName, outSabund);
781 m->openOutputFile(rabundFileName, outRabund);
782 m->openOutputFile(listFileName, outList);
784 outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
785 outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
786 outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
787 map<float, int>::iterator itLabel;
789 //for each label needed
790 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
793 if (itLabel->first == -1) { thisLabel = "unique"; }
794 else { thisLabel = toString(itLabel->first, length-1); }
796 outList << thisLabel << '\t' << itLabel->second << '\t';
798 RAbundVector* rabund = new RAbundVector();
799 rabund->setLabel(thisLabel);
802 if (listSingle != NULL) {
803 for (int j = 0; j < listSingle->getNumBins(); j++) {
804 outList << listSingle->get(j) << '\t';
805 rabund->push_back(m->getNumNames(listSingle->get(j)));
809 //get the list info from each file
810 for (int k = 0; k < listNames.size(); k++) {
812 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]); } delete rabund; return 0; }
814 InputData* input = new InputData(listNames[k], "list");
815 ListVector* list = input->getListVector(thisLabel);
817 //this file has reached the end
818 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine(); }
820 for (int j = 0; j < list->getNumBins(); j++) {
821 outList << list->get(j) << '\t';
822 rabund->push_back(m->getNumNames(list->get(j)));
829 SAbundVector sabund = rabund->getSAbundVector();
831 sabund.print(outSabund);
832 rabund->print(outRabund);
842 if (listSingle != NULL) { delete listSingle; }
844 for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]); }
848 catch(exception& e) {
849 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
854 //**********************************************************************************************************************
856 void ClusterSplitCommand::printData(ListVector* oldList){
858 string label = oldList->getLabel();
859 RAbundVector oldRAbund = oldList->getRAbundVector();
861 oldRAbund.setLabel(label);
862 if (m->isTrue(showabund)) {
863 oldRAbund.getSAbundVector().print(cout);
865 oldRAbund.print(outRabund);
866 oldRAbund.getSAbundVector().print(outSabund);
868 oldList->print(outList);
870 catch(exception& e) {
871 m->errorOut(e, "ClusterSplitCommand", "printData");
875 //**********************************************************************************************************************
876 vector<string> ClusterSplitCommand::createProcesses(vector< map<string, string> > distName, set<string>& labels){
879 vector<string> listFiles;
880 vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
881 dividedNames.resize(processors);
883 //for each file group figure out which process will complete it
884 //want to divide the load intelligently so the big files are spread between processes
885 for (int i = 0; i < distName.size(); i++) {
887 int processToAssign = (i+1) % processors;
888 if (processToAssign == 0) { processToAssign = processors; }
890 dividedNames[(processToAssign-1)].push_back(distName[i]);
891 if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); }
894 //not lets reverse the order of ever other process, so we balance big files running with little ones
895 for (int i = 0; i < processors; i++) {
897 int remainder = ((i+1) % processors);
898 if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); }
901 if (m->control_pressed) { return listFiles; }
903 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
907 //loop through and create all the processes you want
908 while (process != processors) {
912 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
916 vector<string> listFileNames = cluster(dividedNames[process], labels);
918 //write out names to file
919 string filename = toString(getpid()) + ".temp";
921 m->openOutputFile(filename, out);
923 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl; }
928 filename = toString(getpid()) + ".temp.labels";
929 m->openOutputFile(filename, outLabels);
931 outLabels << cutoff << endl;
932 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
933 outLabels << (*it) << endl;
939 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
940 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
946 listFiles = cluster(dividedNames[0], labels);
948 //force parent to wait until all the processes are done
949 for (int i=0;i< processIDS.size();i++) {
950 int temp = processIDS[i];
954 //get list of list file names from each process
955 for(int i=0;i<processIDS.size();i++){
956 string filename = toString(processIDS[i]) + ".temp";
958 m->openInputFile(filename, in);
960 in >> tag; m->gobble(in);
964 in >> tempName; m->gobble(in);
965 listFiles.push_back(tempName);
968 m->mothurRemove((toString(processIDS[i]) + ".temp"));
971 filename = toString(processIDS[i]) + ".temp.labels";
973 m->openInputFile(filename, in2);
976 in2 >> tempCutoff; m->gobble(in2);
977 if (tempCutoff < cutoff) { cutoff = tempCutoff; }
981 in2 >> tempName; m->gobble(in2);
982 if (labels.count(tempName) == 0) { labels.insert(tempName); }
985 m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
991 //////////////////////////////////////////////////////////////////////////////////////////////////////
992 //Windows version shared memory, so be careful when passing variables through the clusterData struct.
993 //Above fork() will clone, so memory is separate, but that's not the case with windows,
994 //Taking advantage of shared memory to allow both threads to add labels.
995 //////////////////////////////////////////////////////////////////////////////////////////////////////
997 vector<clusterData*> pDataArray;
998 DWORD dwThreadIdArray[processors-1];
999 HANDLE hThreadArray[processors-1];
1001 //Create processor worker threads.
1002 for( int i=1; i<processors; i++ ){
1003 // Allocate memory for thread data.
1004 clusterData* tempCluster = new clusterData(dividedNames[i], m, cutoff, method, outputDir, hard, precision, length, i);
1005 pDataArray.push_back(tempCluster);
1006 processIDS.push_back(i);
1008 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1009 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1010 hThreadArray[i-1] = CreateThread(NULL, 0, MyClusterThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1015 listFiles = cluster(dividedNames[0], labels);
1017 //Wait until all threads have terminated.
1018 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1020 //Close all thread handles and free memory allocations.
1021 for(int i=0; i < pDataArray.size(); i++){
1023 tag = pDataArray[i]->tag;
1024 //get listfiles created
1025 for(int j=0; j < pDataArray[i]->listFiles.size(); j++){ listFiles.push_back(pDataArray[i]->listFiles[j]); }
1027 set<string>::iterator it;
1028 for(it = pDataArray[i]->labels.begin(); it != pDataArray[i]->labels.end(); it++){ labels.insert(*it); }
1030 if (pDataArray[i]->cutoff < cutoff) { cutoff = pDataArray[i]->cutoff; }
1031 CloseHandle(hThreadArray[i]);
1032 delete pDataArray[i];
1040 catch(exception& e) {
1041 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
1045 //**********************************************************************************************************************
1047 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
1050 vector<string> listFileNames;
1051 double smallestCutoff = cutoff;
1053 //cluster each distance file
1054 for (int i = 0; i < distNames.size(); i++) {
1056 string thisNamefile = distNames[i].begin()->second;
1057 string thisDistFile = distNames[i].begin()->first;
1059 string listFileName = "";
1060 if (classic) { listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff); }
1061 else { listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff); }
1063 if (m->control_pressed) { //clean up
1064 for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); }
1065 listFileNames.clear(); return listFileNames;
1068 listFileNames.push_back(listFileName);
1071 cutoff = smallestCutoff;
1073 return listFileNames;
1076 catch(exception& e) {
1077 m->errorOut(e, "ClusterSplitCommand", "cluster");
1083 //**********************************************************************************************************************
1084 string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
1086 string listFileName = "";
1088 ListVector* list = NULL;
1090 RAbundVector* rabund = NULL;
1094 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1096 //output your files too
1098 cout << endl << "Reading " << thisDistFile << endl;
1102 m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
1104 NameAssignment* nameMap = new NameAssignment(thisNamefile);
1107 //reads phylip file storing data in 2D vector, also fills list and rabund
1109 ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
1110 cluster->readPhylipFile(thisDistFile, nameMap);
1111 tag = cluster->getTag();
1113 if (m->control_pressed) { delete cluster; return 0; }
1115 list = cluster->getListVector();
1116 rabund = cluster->getRAbundVector();
1118 if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1119 fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1120 listFileName = fileroot+ tag + ".list";
1123 m->openOutputFile(fileroot+ tag + ".list", listFile);
1125 float previousDist = 0.00000;
1126 float rndPreviousDist = 0.00000;
1130 //output your files too
1132 cout << endl << "Clustering " << thisDistFile << endl;
1136 m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1138 while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
1139 if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close(); return listFileName; }
1141 cluster->update(cutoff);
1143 float dist = cluster->getSmallDist();
1146 rndDist = m->ceilDist(dist, precision);
1148 rndDist = m->roundDist(dist, precision);
1151 if(previousDist <= 0.0000 && dist != previousDist){
1152 oldList.setLabel("unique");
1153 oldList.print(listFile);
1154 if (labels.count("unique") == 0) { labels.insert("unique"); }
1156 else if(rndDist != rndPreviousDist){
1157 oldList.setLabel(toString(rndPreviousDist, length-1));
1158 oldList.print(listFile);
1159 if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
1163 previousDist = dist;
1164 rndPreviousDist = rndDist;
1168 if(previousDist <= 0.0000){
1169 oldList.setLabel("unique");
1170 oldList.print(listFile);
1171 if (labels.count("unique") == 0) { labels.insert("unique"); }
1173 else if(rndPreviousDist<cutoff){
1174 oldList.setLabel(toString(rndPreviousDist, length-1));
1175 oldList.print(listFile);
1176 if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
1182 delete cluster; delete nameMap; delete list; delete rabund;
1185 return listFileName;
1188 catch(exception& e) {
1189 m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile");
1194 //**********************************************************************************************************************
1195 string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
1197 string listFileName = "";
1199 Cluster* cluster = NULL;
1200 SparseMatrix* matrix = NULL;
1201 ListVector* list = NULL;
1203 RAbundVector* rabund = NULL;
1205 if (m->control_pressed) { return listFileName; }
1209 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1211 //output your files too
1213 cout << endl << "Reading " << thisDistFile << endl;
1217 m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
1219 ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
1220 read->setCutoff(cutoff);
1222 NameAssignment* nameMap = new NameAssignment(thisNamefile);
1224 read->read(nameMap);
1226 if (m->control_pressed) { delete read; delete nameMap; return listFileName; }
1228 list = read->getListVector();
1230 matrix = read->getMatrix();
1232 delete read; read = NULL;
1233 delete nameMap; nameMap = NULL;
1237 //output your files too
1239 cout << endl << "Clustering " << thisDistFile << endl;
1243 m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
1245 rabund = new RAbundVector(list->getRAbundVector());
1248 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
1249 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
1250 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); }
1251 tag = cluster->getTag();
1253 if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
1254 fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
1257 m->openOutputFile(fileroot+ tag + ".list", listFile);
1259 listFileName = fileroot+ tag + ".list";
1261 float previousDist = 0.00000;
1262 float rndPreviousDist = 0.00000;
1268 double saveCutoff = cutoff;
1270 while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
1272 if (m->control_pressed) { //clean up
1273 delete matrix; delete list; delete cluster; delete rabund;
1275 m->mothurRemove(listFileName);
1276 return listFileName;
1279 cluster->update(saveCutoff);
1281 float dist = matrix->getSmallDist();
1284 rndDist = m->ceilDist(dist, precision);
1286 rndDist = m->roundDist(dist, precision);
1289 if(previousDist <= 0.0000 && dist != previousDist){
1290 oldList.setLabel("unique");
1291 oldList.print(listFile);
1292 if (labels.count("unique") == 0) { labels.insert("unique"); }
1294 else if(rndDist != rndPreviousDist){
1295 oldList.setLabel(toString(rndPreviousDist, length-1));
1296 oldList.print(listFile);
1297 if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
1300 previousDist = dist;
1301 rndPreviousDist = rndDist;
1306 if(previousDist <= 0.0000){
1307 oldList.setLabel("unique");
1308 oldList.print(listFile);
1309 if (labels.count("unique") == 0) { labels.insert("unique"); }
1311 else if(rndPreviousDist<cutoff){
1312 oldList.setLabel(toString(rndPreviousDist, length-1));
1313 oldList.print(listFile);
1314 if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
1317 delete matrix; delete list; delete cluster; delete rabund;
1318 matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
1321 if (m->control_pressed) { //clean up
1322 m->mothurRemove(listFileName);
1323 return listFileName;
1326 m->mothurRemove(thisDistFile);
1327 m->mothurRemove(thisNamefile);
1329 if (saveCutoff != cutoff) {
1330 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
1331 else { saveCutoff = m->roundDist(saveCutoff, precision); }
1333 m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();
1336 if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; }
1338 return listFileName;
1341 catch(exception& e) {
1342 m->errorOut(e, "ClusterSplitCommand", "clusterFile");
1346 //**********************************************************************************************************************
1348 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
1353 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1358 string thisOutputDir = outputDir;
1359 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
1360 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column");
1361 m->mothurRemove(outputFileName);
1364 for (int i = 0; i < distNames.size(); i++) {
1365 if (m->control_pressed) { return 0; }
1367 string thisDistFile = distNames[i].begin()->first;
1369 m->appendFiles(thisDistFile, outputFileName);
1372 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
1382 catch(exception& e) {
1383 m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
1387 //**********************************************************************************************************************