2 * clustersplitcommand.cpp
5 * Created by westcott on 5/19/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "clustersplitcommand.h"
11 #include "readcluster.h"
12 #include "splitmatrix.h"
13 #include "readphylip.h"
14 #include "readcolumn.h"
15 #include "readmatrix.hpp"
16 #include "inputdata.h"
18 //**********************************************************************************************************************
19 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
20 ClusterSplitCommand::ClusterSplitCommand(string option) {
22 globaldata = GlobalData::getInstance();
26 //allow user to run help
27 if(option == "help") { help(); abort = true; }
30 //valid paramters for this command
31 string Array[] = {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
32 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
34 OptionParser parser(option);
35 map<string,string> parameters = parser.getParameters();
37 ValidParameters validParameter;
39 //check to make sure all parameters are valid for command
40 map<string,string>::iterator it;
41 for (it = parameters.begin(); it != parameters.end(); it++) {
42 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
47 globaldata->newRead();
49 //if the user changes the output directory command factory will send this info to us in the output parameter
50 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
52 //if the user changes the input directory command factory will send this info to us in the output parameter
53 string inputDir = validParameter.validFile(parameters, "inputdir", false);
54 if (inputDir == "not found"){ inputDir = ""; }
57 it = parameters.find("phylip");
58 //user has given a template file
59 if(it != parameters.end()){
60 path = hasPath(it->second);
61 //if the user has not given a path then, add inputdir. else leave path alone.
62 if (path == "") { parameters["phylip"] = inputDir + it->second; }
65 it = parameters.find("column");
66 //user has given a template file
67 if(it != parameters.end()){
68 path = hasPath(it->second);
69 //if the user has not given a path then, add inputdir. else leave path alone.
70 if (path == "") { parameters["column"] = inputDir + it->second; }
73 it = parameters.find("name");
74 //user has given a template file
75 if(it != parameters.end()){
76 path = hasPath(it->second);
77 //if the user has not given a path then, add inputdir. else leave path alone.
78 if (path == "") { parameters["name"] = inputDir + it->second; }
81 it = parameters.find("taxonomy");
82 //user has given a template file
83 if(it != parameters.end()){
84 path = hasPath(it->second);
85 //if the user has not given a path then, add inputdir. else leave path alone.
86 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
89 it = parameters.find("fasta");
90 //user has given a template file
91 if(it != parameters.end()){
92 path = hasPath(it->second);
93 //if the user has not given a path then, add inputdir. else leave path alone.
94 if (path == "") { parameters["fasta"] = inputDir + it->second; }
98 //check for required parameters
99 phylipfile = validParameter.validFile(parameters, "phylip", true);
100 if (phylipfile == "not open") { abort = true; }
101 else if (phylipfile == "not found") { phylipfile = ""; }
102 else { distfile = phylipfile; format = "phylip"; }
104 columnfile = validParameter.validFile(parameters, "column", true);
105 if (columnfile == "not open") { abort = true; }
106 else if (columnfile == "not found") { columnfile = ""; }
107 else { distfile = columnfile; format = "column"; }
109 namefile = validParameter.validFile(parameters, "name", true);
110 if (namefile == "not open") { abort = true; }
111 else if (namefile == "not found") { namefile = ""; }
113 fastafile = validParameter.validFile(parameters, "fasta", true);
114 if (fastafile == "not open") { abort = true; }
115 else if (fastafile == "not found") { fastafile = ""; }
116 else { distfile = fastafile; splitmethod = "fasta"; }
118 taxFile = validParameter.validFile(parameters, "taxonomy", true);
119 if (taxFile == "not open") { abort = true; }
120 else if (taxFile == "not found") { taxFile = ""; }
122 if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column or fastafile."); m->mothurOutEndLine(); abort = true; }
123 else if ((phylipfile != "") && (columnfile != "") && (fastafile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: fasta, phylip or column."); m->mothurOutEndLine(); abort = true; }
125 if (columnfile != "") {
126 if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
129 if (fastafile != "") {
130 if (taxFile == "") { m->mothurOut("You need to provide a taxonomy file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
131 if (namefile == "") { m->mothurOut("You need to provide a names file if you are using a fasta file to generate the split."); m->mothurOutEndLine(); abort = true; }
134 //check for optional parameter and set defaults
135 // ...at some point should added some additional type checking...
136 //get user cutoff and precision or use defaults
138 temp = validParameter.validFile(parameters, "precision", false);
139 if (temp == "not found") { temp = "100"; }
140 //saves precision legnth for formatting below
141 length = temp.length();
142 convert(temp, precision);
144 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
147 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
148 large = isTrue(temp);
150 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
151 convert(temp, processors);
153 temp = validParameter.validFile(parameters, "splitmethod", false);
154 if (splitmethod != "fasta") {
155 if (temp == "not found") { splitmethod = "distance"; }
156 else { splitmethod = temp; }
159 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
160 convert(temp, cutoff);
161 cutoff += (5 / (precision * 10.0));
163 temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "1"; }
164 convert(temp, taxLevelCutoff);
166 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "furthest"; }
168 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
169 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
171 if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
172 else { m->mothurOut(splitmethod + " is not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
174 if ((splitmethod == "classify") && (taxFile == "")) { m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true; }
176 showabund = validParameter.validFile(parameters, "showabund", false);
177 if (showabund == "not found") { showabund = "T"; }
179 timing = validParameter.validFile(parameters, "timing", false);
180 if (timing == "not found") { timing = "F"; }
184 catch(exception& e) {
185 m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
190 //**********************************************************************************************************************
192 void ClusterSplitCommand::help(){
194 m->mothurOut("The cluster.split command parameter options are fasta, phylip, column, name, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, processors. Fasta or Phylip or column and name are required.\n");
195 m->mothurOut("The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n");
196 m->mothurOut("For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n");
197 m->mothurOut("For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify. \n");
198 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and split the distance file based on those groups. \n");
199 m->mothurOut("For the classification method using a fasta file, you need to provide your fasta file, names file and taxonomy file. \n");
200 m->mothurOut("You will also need to set the taxlevel you want to split by. mothur will split the sequence into distinct taxonomy groups, and create distance files for each grouping. \n");
201 m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
202 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file. \n");
203 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
204 m->mothurOut("The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n");
205 m->mothurOut("The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n");
206 m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
207 m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
208 m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
209 m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1. \n");
210 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n");
211 m->mothurOut("The cluster.split command should be in the following format: \n");
212 m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision, splitmethod=yourSplitmethod, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n");
213 m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify, taxonomy=abrecovery.silva.slv.taxonomy, taxlevel=5) \n");
216 catch(exception& e) {
217 m->errorOut(e, "ClusterSplitCommand", "help");
222 //**********************************************************************************************************************
224 ClusterSplitCommand::~ClusterSplitCommand(){}
226 //**********************************************************************************************************************
228 int ClusterSplitCommand::execute(){
231 if (abort == true) { return 0; }
233 //****************** file prep work ******************************//
235 //if user gave a phylip file convert to column file
236 if (format == "phylip") {
238 m->mothurOut("Converting to column format..."); m->mothurOutEndLine();
240 ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
242 NameAssignment* nameMap = NULL;
243 convert->setFormat("phylip");
244 convert->read(nameMap);
246 if (m->control_pressed) { delete convert; return 0; }
248 distfile = convert->getOutputFile();
250 //if no names file given with phylip file, create it
251 ListVector* listToMakeNameFile = convert->getListVector();
252 if (namefile == "") { //you need to make a namefile for split matrix
254 namefile = phylipfile + ".names";
255 openOutputFile(namefile, out);
256 for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
257 string bin = listToMakeNameFile->get(i);
258 out << bin << '\t' << bin << endl;
262 delete listToMakeNameFile;
265 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to convert the distance file."); m->mothurOutEndLine();
267 if (m->control_pressed) { return 0; }
270 m->mothurOut("Splitting the file..."); m->mothurOutEndLine();
272 //split matrix into non-overlapping groups
274 if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); }
275 else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); }
276 else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors, outputDir); }
277 else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; }
281 if (m->control_pressed) { delete split; return 0; }
283 string singletonName = split->getSingletonNames();
284 vector< map<string, string> > distName = split->getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
287 if (m->control_pressed) { return 0; }
289 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
292 //****************** break up files between processes and cluster each file set ******************************//
293 vector<string> listFileNames;
295 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
297 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
299 vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
300 dividedNames.resize(processors);
302 //for each file group figure out which process will complete it
303 //want to divide the load intelligently so the big files are spread between processes
305 for (int i = 0; i < distName.size(); i++) {
306 int processToAssign = (i+1) % processors;
307 if (processToAssign == 0) { processToAssign = processors; }
309 dividedNames[(processToAssign-1)].push_back(distName[i]);
312 //not lets reverse the order of ever other process, so we balance big files running with little ones
313 for (int i = 0; i < processors; i++) {
314 int remainder = ((i+1) % processors);
315 if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); }
318 createProcesses(dividedNames);
320 if (m->control_pressed) { return 0; }
322 //get list of list file names from each process
323 for(int i=0;i<processors;i++){
324 string filename = toString(processIDS[i]) + ".temp";
326 openInputFile(filename, in);
328 in >> tag; gobble(in);
332 in >> tempName; gobble(in);
333 listFileNames.push_back(tempName);
336 remove((toString(processIDS[i]) + ".temp").c_str());
339 filename = toString(processIDS[i]) + ".temp.labels";
341 openInputFile(filename, in2);
345 in2 >> tempName; gobble(in2);
346 if (labels.count(tempName) == 0) { labels.insert(tempName); }
349 remove((toString(processIDS[i]) + ".temp.labels").c_str());
353 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
356 if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
358 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
360 //****************** merge list file and create rabund and sabund files ******************************//
362 m->mothurOut("Merging the clustered files..."); m->mothurOutEndLine();
364 ListVector* listSingle;
365 map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
367 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
369 mergeLists(listFileNames, labelBins, listSingle);
371 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
373 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
375 m->mothurOutEndLine();
376 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
377 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
378 m->mothurOutEndLine();
382 catch(exception& e) {
383 m->errorOut(e, "ClusterSplitCommand", "execute");
387 //**********************************************************************************************************************
388 map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string>& userLabels, ListVector*& listSingle){
391 map<float, int> labelBin;
392 vector<float> orderFloat;
396 if (singleton != "none") {
398 openInputFile(singleton, in);
400 string firstCol, secondCol;
401 listSingle = new ListVector();
403 in >> firstCol >> secondCol; gobble(in);
404 listSingle->push_back(secondCol);
407 remove(singleton.c_str());
409 numSingleBins = listSingle->getNumBins();
410 }else{ listSingle = NULL; numSingleBins = 0; }
412 //go through users set and make them floats so we can sort them
413 for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
416 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) { convert(*it, temp); }
417 else if (*it == "unique") { temp = -1.0; }
419 orderFloat.push_back(temp);
420 labelBin[temp] = numSingleBins; //initialize numbins
424 sort(orderFloat.begin(), orderFloat.end());
427 //get the list info from each file
428 for (int k = 0; k < listNames.size(); k++) {
430 if (m->control_pressed) {
431 if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str()); }
432 for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); }
436 InputData* input = new InputData(listNames[k], "list");
437 ListVector* list = input->getListVector();
438 string lastLabel = list->getLabel();
440 string filledInList = listNames[k] + "filledInTemp";
442 openOutputFile(filledInList, outFilled);
444 //for each label needed
445 for(int l = 0; l < orderFloat.size(); l++){
448 if (orderFloat[l] == -1) { thisLabel = "unique"; }
449 else { thisLabel = toString(orderFloat[l], length-1); }
451 //this file has reached the end
453 list = input->getListVector(lastLabel, true);
454 }else{ //do you have the distance, or do you need to fill in
457 if (list->getLabel() == "unique") { labelFloat = -1.0; }
458 else { convert(list->getLabel(), labelFloat); }
460 //check for missing labels
461 if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
462 //if its bigger get last label, otherwise keep it
464 list = input->getListVector(lastLabel, true); //get last list vector to use, you actually want to move back in the file
466 lastLabel = list->getLabel();
470 list->setLabel(thisLabel);
471 list->print(outFilled);
474 labelBin[orderFloat[l]] += list->getNumBins();
478 list = input->getListVector();
481 if (list != NULL) { delete list; }
485 remove(listNames[k].c_str());
486 rename(filledInList.c_str(), listNames[k].c_str());
491 catch(exception& e) {
492 m->errorOut(e, "ClusterSplitCommand", "completeListFile");
496 //**********************************************************************************************************************
497 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
499 if (outputDir == "") { outputDir += hasPath(distfile); }
500 fileroot = outputDir + getRootName(getSimpleName(distfile));
502 openOutputFile(fileroot+ tag + ".sabund", outSabund);
503 openOutputFile(fileroot+ tag + ".rabund", outRabund);
504 openOutputFile(fileroot+ tag + ".list", outList);
506 outputNames.push_back(fileroot+ tag + ".sabund");
507 outputNames.push_back(fileroot+ tag + ".rabund");
508 outputNames.push_back(fileroot+ tag + ".list");
510 map<float, int>::iterator itLabel;
512 //for each label needed
513 for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
516 if (itLabel->first == -1) { thisLabel = "unique"; }
517 else { thisLabel = toString(itLabel->first, length-1); }
519 outList << thisLabel << '\t' << itLabel->second << '\t';
521 RAbundVector* rabund = new RAbundVector();
522 rabund->setLabel(thisLabel);
525 if (listSingle != NULL) {
526 for (int j = 0; j < listSingle->getNumBins(); j++) {
527 outList << listSingle->get(j) << '\t';
528 rabund->push_back(getNumNames(listSingle->get(j)));
532 //get the list info from each file
533 for (int k = 0; k < listNames.size(); k++) {
535 if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); } delete rabund; return 0; }
537 InputData* input = new InputData(listNames[k], "list");
538 ListVector* list = input->getListVector(thisLabel);
540 //this file has reached the end
541 if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine(); }
543 for (int j = 0; j < list->getNumBins(); j++) {
544 outList << list->get(j) << '\t';
545 rabund->push_back(getNumNames(list->get(j)));
552 SAbundVector sabund = rabund->getSAbundVector();
554 sabund.print(outSabund);
555 rabund->print(outRabund);
565 if (listSingle != NULL) { delete listSingle; }
567 for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); }
571 catch(exception& e) {
572 m->errorOut(e, "ClusterSplitCommand", "mergeLists");
577 //**********************************************************************************************************************
579 void ClusterSplitCommand::printData(ListVector* oldList){
581 string label = oldList->getLabel();
582 RAbundVector oldRAbund = oldList->getRAbundVector();
584 oldRAbund.setLabel(label);
585 if (isTrue(showabund)) {
586 oldRAbund.getSAbundVector().print(cout);
588 oldRAbund.print(outRabund);
589 oldRAbund.getSAbundVector().print(outSabund);
591 oldList->print(outList);
593 catch(exception& e) {
594 m->errorOut(e, "ClusterSplitCommand", "printData");
598 //**********************************************************************************************************************
599 int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
602 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
607 //loop through and create all the processes you want
608 while (process != processors) {
612 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
616 vector<string> listFileNames = cluster(dividedNames[process], labels);
618 //write out names to file
619 string filename = toString(getpid()) + ".temp";
621 openOutputFile(filename, out);
623 for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl; }
628 filename = toString(getpid()) + ".temp.labels";
629 openOutputFile(filename, outLabels);
631 for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
632 outLabels << (*it) << endl;
637 }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
640 //force parent to wait until all the processes are done
641 for (int i=0;i<processors;i++) {
642 int temp = processIDS[i];
650 catch(exception& e) {
651 m->errorOut(e, "ClusterSplitCommand", "createProcesses");
655 //**********************************************************************************************************************
657 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
660 SparseMatrix* matrix;
663 RAbundVector* rabund;
665 vector<string> listFileNames;
667 //cluster each distance file
668 for (int i = 0; i < distNames.size(); i++) {
670 string thisNamefile = distNames[i].begin()->second;
671 string thisDistFile = distNames[i].begin()->first;
673 //read in distance file
674 globaldata->setNameFile(thisNamefile);
675 globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
677 m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
679 ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
680 read->setCutoff(cutoff);
682 NameAssignment* nameMap = new NameAssignment(thisNamefile);
686 if (m->control_pressed) { delete read; delete nameMap; return listFileNames; }
688 list = read->getListVector();
690 matrix = read->getMatrix();
695 m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
697 rabund = new RAbundVector(list->getRAbundVector());
700 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
701 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
702 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); }
703 tag = cluster->getTag();
705 if (outputDir == "") { outputDir += hasPath(thisDistFile); }
706 fileroot = outputDir + getRootName(getSimpleName(thisDistFile));
709 openOutputFile(fileroot+ tag + ".list", listFile);
711 listFileNames.push_back(fileroot+ tag + ".list");
713 time_t estart = time(NULL);
715 float previousDist = 0.00000;
716 float rndPreviousDist = 0.00000;
722 double saveCutoff = cutoff;
724 while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
726 if (m->control_pressed) { //clean up
727 delete matrix; delete list; delete cluster; delete rabund;
729 for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); }
730 listFileNames.clear(); return listFileNames;
733 cluster->update(cutoff);
735 float dist = matrix->getSmallDist();
738 rndDist = ceilDist(dist, precision);
740 rndDist = roundDist(dist, precision);
743 if(previousDist <= 0.0000 && dist != previousDist){
744 oldList.setLabel("unique");
745 oldList.print(listFile);
746 if (labels.count("unique") == 0) { labels.insert("unique"); }
748 else if(rndDist != rndPreviousDist){
749 oldList.setLabel(toString(rndPreviousDist, length-1));
750 oldList.print(listFile);
751 if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
755 rndPreviousDist = rndDist;
760 if(previousDist <= 0.0000){
761 oldList.setLabel("unique");
762 oldList.print(listFile);
763 if (labels.count("unique") == 0) { labels.insert("unique"); }
765 else if(rndPreviousDist<cutoff){
766 oldList.setLabel(toString(rndPreviousDist, length-1));
767 oldList.print(listFile);
768 if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
771 delete matrix; delete list; delete cluster; delete rabund;
774 if (m->control_pressed) { //clean up
775 for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); }
776 listFileNames.clear(); return listFileNames;
779 remove(thisDistFile.c_str());
780 remove(thisNamefile.c_str());
784 return listFileNames;
787 catch(exception& e) {
788 m->errorOut(e, "ClusterSplitCommand", "cluster");
795 //**********************************************************************************************************************