5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "MGClusterCommand", "setParameters");
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){
41 string helpString = "";
42 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46 helpString += "The precision parameter's default value is 100. \n";
47 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
48 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
50 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
51 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
52 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53 helpString += "The mgcluster command should be in the following format: \n";
54 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
59 m->errorOut(e, "MGClusterCommand", "getHelpString");
63 //**********************************************************************************************************************
64 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
66 string outputFileName = "";
67 map<string, vector<string> >::iterator it;
69 //is this a type this command creates
70 it = outputTypes.find(type);
71 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73 if (type == "list") { outputFileName = "list"; }
74 else if (type == "rabund") { outputFileName = "rabund"; }
75 else if (type == "sabund") { outputFileName = "sabund"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
78 return outputFileName;
81 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
85 //**********************************************************************************************************************
86 MGClusterCommand::MGClusterCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["list"] = tempOutNames;
92 outputTypes["rabund"] = tempOutNames;
93 outputTypes["sabund"] = tempOutNames;
96 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
100 //**********************************************************************************************************************
101 MGClusterCommand::MGClusterCommand(string option) {
103 abort = false; calledHelp = false;
105 //allow user to run help
106 if(option == "help") { help(); abort = true; calledHelp = true; }
107 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string, string> parameters = parser.getParameters();
115 ValidParameters validParameter;
116 map<string,string>::iterator it;
118 //check to make sure all parameters are valid for command
119 for (it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //initialize outputTypes
124 vector<string> tempOutNames;
125 outputTypes["list"] = tempOutNames;
126 outputTypes["rabund"] = tempOutNames;
127 outputTypes["sabund"] = tempOutNames;
129 //if the user changes the input directory command factory will send this info to us in the output parameter
130 string inputDir = validParameter.validFile(parameters, "inputdir", false);
131 if (inputDir == "not found"){ inputDir = ""; }
134 it = parameters.find("blast");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["blast"] = inputDir + it->second; }
142 it = parameters.find("name");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["name"] = inputDir + it->second; }
152 //check for required parameters
153 blastfile = validParameter.validFile(parameters, "blast", true);
154 if (blastfile == "not open") { blastfile = ""; abort = true; }
155 else if (blastfile == "not found") { blastfile = ""; }
157 //if the user changes the output directory command factory will send this info to us in the output parameter
158 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
160 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
163 namefile = validParameter.validFile(parameters, "name", true);
164 if (namefile == "not open") { abort = true; }
165 else if (namefile == "not found") { namefile = ""; }
166 else { m->setNameFile(namefile); }
168 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
170 //check for optional parameter and set defaults
172 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
173 precisionLength = temp.length();
174 m->mothurConvert(temp, precision);
176 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
177 m->mothurConvert(temp, cutoff);
178 cutoff += (5 / (precision * 10.0));
180 method = validParameter.validFile(parameters, "method", false);
181 if (method == "not found") { method = "average"; }
183 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
184 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
186 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
187 m->mothurConvert(temp, length);
189 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
190 m->mothurConvert(temp, penalty);
192 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
193 minWanted = m->isTrue(temp);
195 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
196 merge = m->isTrue(temp);
198 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
199 hclusterWanted = m->isTrue(temp);
201 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
202 hard = m->isTrue(temp);
206 catch(exception& e) {
207 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
211 //**********************************************************************************************************************
212 int MGClusterCommand::execute(){
215 if (abort == true) { if (calledHelp) { return 0; } return 2; }
218 if (namefile != "") {
219 nameMap = new NameAssignment(namefile);
221 }else{ nameMap= new NameAssignment(); }
223 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
226 float previousDist = 0.00000;
227 float rndPreviousDist = 0.00000;
229 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
230 //must remember to delete those objects here since readBlast does not
231 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
234 list = new ListVector(nameMap->getListVector());
235 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
237 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
241 map<string, int> Seq2Bin;
242 map<string, int> oldSeq2Bin;
244 if (method == "furthest") { tag = "fn"; }
245 else if (method == "nearest") { tag = "nn"; }
248 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
249 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
250 string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
252 m->openOutputFile(sabundFileName, sabundFile);
253 m->openOutputFile(rabundFileName, rabundFile);
254 m->openOutputFile(listFileName, listFile);
256 if (m->control_pressed) {
257 delete nameMap; delete read; delete list; delete rabund;
258 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
263 double saveCutoff = cutoff;
265 if (!hclusterWanted) {
266 //get distmatrix and overlap
267 SparseMatrix* distMatrix = read->getDistMatrix();
268 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
272 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
273 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
274 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
275 cluster->setMapWanted(true);
276 Seq2Bin = cluster->getSeqtoBin();
277 oldSeq2Bin = Seq2Bin;
279 if (m->control_pressed) {
280 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
281 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
286 //cluster using cluster classes
287 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
289 cluster->update(cutoff);
291 if (m->control_pressed) {
292 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
293 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
298 float dist = distMatrix->getSmallDist();
301 rndDist = m->ceilDist(dist, precision);
303 rndDist = m->roundDist(dist, precision);
306 if(previousDist <= 0.0000 && dist != previousDist){
307 oldList.setLabel("unique");
310 else if(rndDist != rndPreviousDist){
312 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
314 if (m->control_pressed) {
315 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
316 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
321 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
325 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
331 rndPreviousDist = rndDist;
333 Seq2Bin = cluster->getSeqtoBin();
334 oldSeq2Bin = Seq2Bin;
337 if(previousDist <= 0.0000){
338 oldList.setLabel("unique");
341 else if(rndPreviousDist<cutoff){
343 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
345 if (m->control_pressed) {
346 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
347 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
352 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
356 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
362 overlapMatrix.clear();
366 }else { //use hcluster to cluster
367 //get distmatrix and overlap
368 overlapFile = read->getOverlapFile();
369 distFile = read->getDistFile();
372 //sort the distance and overlap files
373 sortHclusterFiles(distFile, overlapFile);
375 if (m->control_pressed) {
376 delete nameMap; delete list; delete rabund;
377 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
383 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
384 hcluster->setMapWanted(true);
385 Seq2Bin = cluster->getSeqtoBin();
386 oldSeq2Bin = Seq2Bin;
388 vector<seqDist> seqs; seqs.resize(1); // to start loop
389 //ifstream inHcluster;
390 //m->openInputFile(distFile, inHcluster);
392 if (m->control_pressed) {
393 delete nameMap; delete list; delete rabund; delete hcluster;
394 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
399 while (seqs.size() != 0){
401 seqs = hcluster->getSeqs();
403 //to account for cutoff change in average neighbor
404 if (seqs.size() != 0) {
405 if (seqs[0].dist > cutoff) { break; }
408 if (m->control_pressed) {
409 delete nameMap; delete list; delete rabund; delete hcluster;
410 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
411 m->mothurRemove(distFile);
412 m->mothurRemove(overlapFile);
417 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
419 if (seqs[i].seq1 != seqs[i].seq2) {
421 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
423 if (m->control_pressed) {
424 delete nameMap; delete list; delete rabund; delete hcluster;
425 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
426 m->mothurRemove(distFile);
427 m->mothurRemove(overlapFile);
434 rndDist = m->ceilDist(seqs[i].dist, precision);
436 rndDist = m->roundDist(seqs[i].dist, precision);
439 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
440 oldList.setLabel("unique");
443 else if((rndDist != rndPreviousDist)){
445 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
447 if (m->control_pressed) {
448 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
449 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
450 m->mothurRemove(distFile);
451 m->mothurRemove(overlapFile);
456 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
460 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
465 previousDist = seqs[i].dist;
466 rndPreviousDist = rndDist;
468 Seq2Bin = cluster->getSeqtoBin();
469 oldSeq2Bin = Seq2Bin;
473 //inHcluster.close();
475 if(previousDist <= 0.0000){
476 oldList.setLabel("unique");
479 else if(rndPreviousDist<cutoff){
481 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
483 if (m->control_pressed) {
484 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
485 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
486 m->mothurRemove(distFile);
487 m->mothurRemove(overlapFile);
492 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
496 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
502 m->mothurRemove(distFile);
503 m->mothurRemove(overlapFile);
512 if (m->control_pressed) {
514 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
519 m->mothurOutEndLine();
520 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
521 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
522 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
523 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
524 m->mothurOutEndLine();
526 if (saveCutoff != cutoff) {
527 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
528 else { saveCutoff = m->roundDist(saveCutoff, precision); }
530 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
533 //set list file as new current listfile
535 itTypes = outputTypes.find("list");
536 if (itTypes != outputTypes.end()) {
537 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
540 //set rabund file as new current rabundfile
541 itTypes = outputTypes.find("rabund");
542 if (itTypes != outputTypes.end()) {
543 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
546 //set sabund file as new current sabundfile
547 itTypes = outputTypes.find("sabund");
548 if (itTypes != outputTypes.end()) {
549 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
553 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
557 catch(exception& e) {
558 m->errorOut(e, "MGClusterCommand", "execute");
562 //**********************************************************************************************************************
563 void MGClusterCommand::printData(ListVector* mergedList){
565 mergedList->print(listFile);
566 mergedList->getRAbundVector().print(rabundFile);
568 SAbundVector sabund = mergedList->getSAbundVector();
571 sabund.print(sabundFile);
573 catch(exception& e) {
574 m->errorOut(e, "MGClusterCommand", "printData");
578 //**********************************************************************************************************************
579 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
580 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
581 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
583 //create new listvector so you don't overwrite the clustering
584 ListVector* newList = new ListVector(oldList);
590 if (hclusterWanted) {
591 m->openInputFile(overlapFile, inOverlap);
592 if (inOverlap.eof()) { done = true; }
593 }else { if (overlapMatrix.size() == 0) { done = true; } }
596 if (m->control_pressed) {
597 if (hclusterWanted) { inOverlap.close(); }
603 if (!hclusterWanted) {
604 if (count < overlapMatrix.size()) { //do we have another node in the matrix
605 overlapNode = overlapMatrix[count];
609 if (!inOverlap.eof()) {
610 string firstName, secondName;
611 float overlapDistance;
612 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
614 //commented out because we check this in readblast already
615 //map<string,int>::iterator itA = nameMap->find(firstName);
616 //map<string,int>::iterator itB = nameMap->find(secondName);
617 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
618 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
620 //overlapNode.seq1 = itA->second;
621 //overlapNode.seq2 = itB->second;
622 overlapNode.seq1 = nameMap->get(firstName);
623 overlapNode.seq2 = nameMap->get(secondName);
624 overlapNode.dist = overlapDistance;
625 }else { inOverlap.close(); break; }
628 if (overlapNode.dist < dist) {
629 //get names of seqs that overlap
630 string name1 = nameMap->get(overlapNode.seq1);
631 string name2 = nameMap->get(overlapNode.seq2);
633 //use binInfo to find out if they are already in the same bin
634 //map<string, int>::iterator itBin1 = binInfo.find(name1);
635 //map<string, int>::iterator itBin2 = binInfo.find(name2);
637 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
638 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
640 //int binKeep = itBin1->second;
641 //int binRemove = itBin2->second;
643 int binKeep = binInfo[name1];
644 int binRemove = binInfo[name2];
646 //if not merge bins and update binInfo
647 if(binKeep != binRemove) {
649 //save names in old bin
650 string names = newList->get(binRemove);
652 //merge bins into name1s bin
653 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
654 newList->set(binRemove, "");
657 while (names.find_first_of(',') != -1) {
659 string name = names.substr(0,names.find_first_of(','));
660 //save name and bin number
661 binInfo[name] = binKeep;
662 names = names.substr(names.find_first_of(',')+1, names.length());
666 binInfo[names] = binKeep;
669 }else { done = true; }
676 catch(exception& e) {
677 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
681 //**********************************************************************************************************************
682 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
685 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
686 m->mothurRemove(unsortedDist); //delete unsorted file
687 distFile = sortedDistFile;
690 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
691 m->mothurRemove(unsortedOverlap); //delete unsorted file
692 overlapFile = sortedOverlapFile;
694 catch(exception& e) {
695 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
700 //**********************************************************************************************************************