X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=clustersplitcommand.cpp;h=b097f382024d5be9c56bd346723e398b10ef3725;hb=0cefb55a2616975bd4a144fc345693695ffc9bb6;hp=34caf654124886f9cd23638a149b2b3487ca53e2;hpb=16abd6271c455bd01b34ff89a2e3641bef0fa128;p=mothur.git diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 34caf65..b097f38 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -8,12 +8,6 @@ */ #include "clustersplitcommand.h" -#include "readcluster.h" -#include "splitmatrix.h" -#include "readphylip.h" -#include "readcolumn.h" -#include "readmatrix.hpp" -#include "inputdata.h" //********************************************************************************************************************** @@ -34,6 +28,7 @@ vector ClusterSplitCommand::setParameters(){ CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod); CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard); + CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -67,6 +62,7 @@ string ClusterSplitCommand::getHelpString(){ helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n"; helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n"; helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n"; + helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic. It is only valid with splitmethod=fasta. Default=f.\n"; #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; #endif @@ -81,6 +77,29 @@ string ClusterSplitCommand::getHelpString(){ } } //********************************************************************************************************************** +string ClusterSplitCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "list") { outputFileName = "list"; } + else if (type == "rabund") { outputFileName = "rabund"; } + else if (type == "sabund") { outputFileName = "sabund"; } + else if (type == "column") { outputFileName = "dist"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ClusterSplitCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** ClusterSplitCommand::ClusterSplitCommand(){ try { abort = true; calledHelp = true; @@ -203,7 +222,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { taxFile = validParameter.validFile(parameters, "taxonomy", true); if (taxFile == "not open") { taxFile = ""; abort = true; } else if (taxFile == "not found") { taxFile = ""; } - else { m->setTaxonomyFile(taxFile); } + else { m->setTaxonomyFile(taxFile); if (splitmethod != "fasta") { splitmethod = "classify"; } } if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { //is there are current file available for either of these? @@ -271,17 +290,22 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; } large = m->isTrue(temp); - + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); m->mothurConvert(temp, processors); temp = validParameter.validFile(parameters, "splitmethod", false); - if (splitmethod != "fasta") { + if ((splitmethod != "fasta") && (splitmethod != "classify")) { if (temp == "not found") { splitmethod = "distance"; } else { splitmethod = temp; } } + temp = validParameter.validFile(parameters, "classic", false); if (temp == "not found") { temp = "F"; } + classic = m->isTrue(temp); + + if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; } + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.25"; } m->mothurConvert(temp, cutoff); cutoff += (5 / (precision * 10.0)); @@ -291,7 +315,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "average"; } - if ((method == "furthest") || (method == "nearest") || (method == "average")) { } + if ((method == "furthest") || (method == "nearest") || (method == "average")) { m->mothurOut("Using splitmethod " + splitmethod + ".\n"); } else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; } if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { } @@ -379,7 +403,7 @@ int ClusterSplitCommand::execute(){ SplitMatrix* split; if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); } else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); } - else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir); } + else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir); } else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; } split->split(); @@ -555,74 +579,16 @@ int ClusterSplitCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); #else - + ///////////////////// WINDOWS CAN ONLY USE 1 PROCESSORS ACCESS VIOLATION UNRESOLVED /////////////////////// //sanity check if (processors > distName.size()) { processors = distName.size(); } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files }else{ - - //cout << processors << '\t' << distName.size() << endl; - vector < vector < map > > dividedNames; //distNames[1] = vector of filenames for process 1... - dividedNames.resize(processors); - - //for each file group figure out which process will complete it - //want to divide the load intelligently so the big files are spread between processes - for (int i = 0; i < distName.size(); i++) { - //cout << i << endl; - int processToAssign = (i+1) % processors; - if (processToAssign == 0) { processToAssign = processors; } - - dividedNames[(processToAssign-1)].push_back(distName[i]); - } - - //not lets reverse the order of ever other process, so we balance big files running with little ones - for (int i = 0; i < processors; i++) { - //cout << i << endl; - int remainder = ((i+1) % processors); - if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); } - } - - createProcesses(dividedNames); - - if (m->control_pressed) { return 0; } - - //get list of list file names from each process - for(int i=0;iopenInputFile(filename, in); - - in >> tag; m->gobble(in); - - while(!in.eof()) { - string tempName; - in >> tempName; m->gobble(in); - listFileNames.push_back(tempName); - } - in.close(); - m->mothurRemove((toString(processIDS[i]) + ".temp")); - - //get labels - filename = toString(processIDS[i]) + ".temp.labels"; - ifstream in2; - m->openInputFile(filename, in2); - - float tempCutoff; - in2 >> tempCutoff; m->gobble(in2); - if (tempCutoff < cutoff) { cutoff = tempCutoff; } - - while(!in2.eof()) { - string tempName; - in2 >> tempName; m->gobble(in2); - if (labels.count(tempName) == 0) { labels.insert(tempName); } - } - in2.close(); - m->mothurRemove((toString(processIDS[i]) + ".temp.labels")); - } - } + listFileNames = createProcesses(distName, labels); + } #else listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files #endif @@ -807,14 +773,17 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us if (outputDir == "") { outputDir += m->hasPath(distfile); } fileroot = outputDir + m->getRootName(m->getSimpleName(distfile)); - m->openOutputFile(fileroot+ tag + ".sabund", outSabund); - m->openOutputFile(fileroot+ tag + ".rabund", outRabund); - m->openOutputFile(fileroot+ tag + ".list", outList); - - outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["list"].push_back(fileroot+ tag + ".list"); - outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund"); - outputNames.push_back(fileroot+ tag + ".list"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund"); + string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund"); + string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund"); + string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list"); + + m->openOutputFile(sabundFileName, outSabund); + m->openOutputFile(rabundFileName, outRabund); + m->openOutputFile(listFileName, outList); + outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); + outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); + outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); map::iterator itLabel; //for each label needed @@ -904,12 +873,35 @@ void ClusterSplitCommand::printData(ListVector* oldList){ } } //********************************************************************************************************************** -int ClusterSplitCommand::createProcesses(vector < vector < map > > dividedNames){ +vector ClusterSplitCommand::createProcesses(vector< map > distName, set& labels){ try { + + vector listFiles; + vector < vector < map > > dividedNames; //distNames[1] = vector of filenames for process 1... + dividedNames.resize(processors); + + //for each file group figure out which process will complete it + //want to divide the load intelligently so the big files are spread between processes + for (int i = 0; i < distName.size(); i++) { + //cout << i << endl; + int processToAssign = (i+1) % processors; + if (processToAssign == 0) { processToAssign = processors; } + + dividedNames[(processToAssign-1)].push_back(distName[i]); + if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); } + } + + //not lets reverse the order of ever other process, so we balance big files running with little ones + for (int i = 0; i < processors; i++) { + //cout << i << endl; + int remainder = ((i+1) % processors); + if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); } + } + + if (m->control_pressed) { return listFiles; } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; - int exitCommand = 1; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + int process = 1; processIDS.clear(); //loop through and create all the processes you want @@ -950,14 +942,99 @@ int ClusterSplitCommand::createProcesses(vector < vector < map > } } + //do your part + listFiles = cluster(dividedNames[0], labels); + //force parent to wait until all the processes are done - for (int i=0;iopenInputFile(filename, in); + + in >> tag; m->gobble(in); + + while(!in.eof()) { + string tempName; + in >> tempName; m->gobble(in); + listFiles.push_back(tempName); + } + in.close(); + m->mothurRemove((toString(processIDS[i]) + ".temp")); + + //get labels + filename = toString(processIDS[i]) + ".temp.labels"; + ifstream in2; + m->openInputFile(filename, in2); + + float tempCutoff; + in2 >> tempCutoff; m->gobble(in2); + if (tempCutoff < cutoff) { cutoff = tempCutoff; } + + while(!in2.eof()) { + string tempName; + in2 >> tempName; m->gobble(in2); + if (labels.count(tempName) == 0) { labels.insert(tempName); } + } + in2.close(); + m->mothurRemove((toString(processIDS[i]) + ".temp.labels")); + } + + + #else + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the clusterData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to allow both threads to add labels. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; - return exitCommand; + //Create processor worker threads. + for( int i=1; itag; + //get listfiles created + for(int j=0; j < pDataArray[i]->listFiles.size(); j++){ listFiles.push_back(pDataArray[i]->listFiles[j]); } + //get labels + set::iterator it; + for(it = pDataArray[i]->labels.begin(); it != pDataArray[i]->labels.end(); it++){ labels.insert(*it); } + //check cutoff + if (pDataArray[i]->cutoff < cutoff) { cutoff = pDataArray[i]->cutoff; } + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + #endif + + return listFiles; } catch(exception& e) { @@ -969,153 +1046,27 @@ int ClusterSplitCommand::createProcesses(vector < vector < map > vector ClusterSplitCommand::cluster(vector< map > distNames, set& labels){ try { - Cluster* cluster; - SparseMatrix* matrix; - ListVector* list; - ListVector oldList; - RAbundVector* rabund; vector listFileNames; - double smallestCutoff = cutoff; //cluster each distance file for (int i = 0; i < distNames.size(); i++) { - if (m->control_pressed) { return listFileNames; } - + string thisNamefile = distNames[i].begin()->second; string thisDistFile = distNames[i].begin()->first; - - #ifdef USE_MPI - int pid; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - - //output your files too - if (pid != 0) { - cout << endl << "Reading " << thisDistFile << endl; - } - #endif - - m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine(); - - ReadMatrix* read = new ReadColumnMatrix(thisDistFile); - read->setCutoff(cutoff); - - NameAssignment* nameMap = new NameAssignment(thisNamefile); - nameMap->readMap(); - read->read(nameMap); - - if (m->control_pressed) { delete read; delete nameMap; return listFileNames; } - - list = read->getListVector(); - oldList = *list; - matrix = read->getMatrix(); - - delete read; - delete nameMap; - - #ifdef USE_MPI - //output your files too - if (pid != 0) { - cout << endl << "Clustering " << thisDistFile << endl; - } - #endif - - m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine(); - - rabund = new RAbundVector(list->getRAbundVector()); - - //create cluster - if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); } - else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); } - else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); } - tag = cluster->getTag(); - - if (outputDir == "") { outputDir += m->hasPath(thisDistFile); } - fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile)); - - ofstream listFile; - m->openOutputFile(fileroot+ tag + ".list", listFile); - - listFileNames.push_back(fileroot+ tag + ".list"); - - float previousDist = 0.00000; - float rndPreviousDist = 0.00000; - - oldList = *list; - - print_start = true; - start = time(NULL); - double saveCutoff = cutoff; - - while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){ - - if (m->control_pressed) { //clean up - delete matrix; delete list; delete cluster; delete rabund; - listFile.close(); - for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } - listFileNames.clear(); return listFileNames; - } - - cluster->update(saveCutoff); - - float dist = matrix->getSmallDist(); - float rndDist; - if (hard) { - rndDist = m->ceilDist(dist, precision); - }else{ - rndDist = m->roundDist(dist, precision); - } - - if(previousDist <= 0.0000 && dist != previousDist){ - oldList.setLabel("unique"); - oldList.print(listFile); - if (labels.count("unique") == 0) { labels.insert("unique"); } - } - else if(rndDist != rndPreviousDist){ - oldList.setLabel(toString(rndPreviousDist, length-1)); - oldList.print(listFile); - if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); } - } - - previousDist = dist; - rndPreviousDist = rndDist; - oldList = *list; - } + string listFileName = ""; + if (classic) { listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff); } + else { listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff); } - - if(previousDist <= 0.0000){ - oldList.setLabel("unique"); - oldList.print(listFile); - if (labels.count("unique") == 0) { labels.insert("unique"); } - } - else if(rndPreviousDistcontrol_pressed) { //clean up for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } listFileNames.clear(); return listFileNames; } - - m->mothurRemove(thisDistFile); - m->mothurRemove(thisNamefile); - - if (saveCutoff != cutoff) { - if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } - else { saveCutoff = m->roundDist(saveCutoff, precision); } - - m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine(); - } - - if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; } - } + + listFileNames.push_back(listFileName); + } cutoff = smallestCutoff; @@ -1130,6 +1081,269 @@ vector ClusterSplitCommand::cluster(vector< map > distNa } //********************************************************************************************************************** +string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set& labels, double& smallestCutoff){ + try { + string listFileName = ""; + + ListVector* list = NULL; + ListVector oldList; + RAbundVector* rabund = NULL; + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + //output your files too + if (pid != 0) { + cout << endl << "Reading " << thisDistFile << endl; + } +#endif + + m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine(); + + NameAssignment* nameMap = new NameAssignment(thisNamefile); + nameMap->readMap(); + + //reads phylip file storing data in 2D vector, also fills list and rabund + bool sim = false; + ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim); + cluster->readPhylipFile(thisDistFile, nameMap); + tag = cluster->getTag(); + + if (m->control_pressed) { delete cluster; return 0; } + + list = cluster->getListVector(); + rabund = cluster->getRAbundVector(); + + if (outputDir == "") { outputDir += m->hasPath(thisDistFile); } + fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile)); + listFileName = fileroot+ tag + ".list"; + + ofstream listFile; + m->openOutputFile(fileroot+ tag + ".list", listFile); + + float previousDist = 0.00000; + float rndPreviousDist = 0.00000; + oldList = *list; + +#ifdef USE_MPI + //output your files too + if (pid != 0) { + cout << endl << "Clustering " << thisDistFile << endl; + } +#endif + + m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine(); + + while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){ + if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close(); return listFileName; } + + cluster->update(cutoff); + + float dist = cluster->getSmallDist(); + float rndDist; + if (hard) { + rndDist = m->ceilDist(dist, precision); + }else{ + rndDist = m->roundDist(dist, precision); + } + + if(previousDist <= 0.0000 && dist != previousDist){ + oldList.setLabel("unique"); + oldList.print(listFile); + if (labels.count("unique") == 0) { labels.insert("unique"); } + } + else if(rndDist != rndPreviousDist){ + oldList.setLabel(toString(rndPreviousDist, length-1)); + oldList.print(listFile); + if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); } + } + + + previousDist = dist; + rndPreviousDist = rndDist; + oldList = *list; + } + + if(previousDist <= 0.0000){ + oldList.setLabel("unique"); + oldList.print(listFile); + if (labels.count("unique") == 0) { labels.insert("unique"); } + } + else if(rndPreviousDisterrorOut(e, "ClusterSplitCommand", "clusterClassicFile"); + exit(1); + } +} + +//********************************************************************************************************************** +string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set& labels, double& smallestCutoff){ + try { + string listFileName = ""; + + Cluster* cluster = NULL; + SparseDistanceMatrix* matrix = NULL; + ListVector* list = NULL; + ListVector oldList; + RAbundVector* rabund = NULL; + + if (m->control_pressed) { return listFileName; } + +#ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + //output your files too + if (pid != 0) { + cout << endl << "Reading " << thisDistFile << endl; + } +#endif + + m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine(); + + ReadMatrix* read = new ReadColumnMatrix(thisDistFile); + read->setCutoff(cutoff); + + NameAssignment* nameMap = new NameAssignment(thisNamefile); + nameMap->readMap(); + read->read(nameMap); + + if (m->control_pressed) { delete read; delete nameMap; return listFileName; } + + list = read->getListVector(); + oldList = *list; + matrix = read->getDMatrix(); + + delete read; read = NULL; + delete nameMap; nameMap = NULL; + + +#ifdef USE_MPI + //output your files too + if (pid != 0) { + cout << endl << "Clustering " << thisDistFile << endl; + } +#endif + + m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine(); + + rabund = new RAbundVector(list->getRAbundVector()); + + //create cluster + if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); } + else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); } + else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); } + tag = cluster->getTag(); + + if (outputDir == "") { outputDir += m->hasPath(thisDistFile); } + fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile)); + + ofstream listFile; + m->openOutputFile(fileroot+ tag + ".list", listFile); + + listFileName = fileroot+ tag + ".list"; + + float previousDist = 0.00000; + float rndPreviousDist = 0.00000; + + oldList = *list; + + print_start = true; + start = time(NULL); + double saveCutoff = cutoff; + + while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){ + + if (m->control_pressed) { //clean up + delete matrix; delete list; delete cluster; delete rabund; + listFile.close(); + m->mothurRemove(listFileName); + return listFileName; + } + + cluster->update(saveCutoff); + + float dist = matrix->getSmallDist(); + float rndDist; + if (hard) { + rndDist = m->ceilDist(dist, precision); + }else{ + rndDist = m->roundDist(dist, precision); + } + + if(previousDist <= 0.0000 && dist != previousDist){ + oldList.setLabel("unique"); + oldList.print(listFile); + if (labels.count("unique") == 0) { labels.insert("unique"); } + } + else if(rndDist != rndPreviousDist){ + oldList.setLabel(toString(rndPreviousDist, length-1)); + oldList.print(listFile); + if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); } + } + + previousDist = dist; + rndPreviousDist = rndDist; + oldList = *list; + } + + + if(previousDist <= 0.0000){ + oldList.setLabel("unique"); + oldList.print(listFile); + if (labels.count("unique") == 0) { labels.insert("unique"); } + } + else if(rndPreviousDistcontrol_pressed) { //clean up + m->mothurRemove(listFileName); + return listFileName; + } + + m->mothurRemove(thisDistFile); + m->mothurRemove(thisNamefile); + + if (saveCutoff != cutoff) { + if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } + else { saveCutoff = m->roundDist(saveCutoff, precision); } + + m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine(); + } + + if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; } + + return listFileName; + + } + catch(exception& e) { + m->errorOut(e, "ClusterSplitCommand", "clusterFile"); + exit(1); + } +} +//********************************************************************************************************************** int ClusterSplitCommand::createMergedDistanceFile(vector< map > distNames) { try{ @@ -1143,7 +1357,7 @@ int ClusterSplitCommand::createMergedDistanceFile(vector< map > string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist"; + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column"); m->mothurRemove(outputFileName);