5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "MGClusterCommand", "setParameters");
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){
41 string helpString = "";
42 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46 helpString += "The precision parameter's default value is 100. \n";
47 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
48 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
50 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
51 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
52 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53 helpString += "The mgcluster command should be in the following format: \n";
54 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
59 m->errorOut(e, "MGClusterCommand", "getHelpString");
63 //**********************************************************************************************************************
64 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
66 string outputFileName = "";
67 map<string, vector<string> >::iterator it;
69 //is this a type this command creates
70 it = outputTypes.find(type);
71 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73 if (type == "list") { outputFileName = "list"; }
74 else if (type == "rabund") { outputFileName = "rabund"; }
75 else if (type == "sabund") { outputFileName = "sabund"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
78 return outputFileName;
81 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
85 //**********************************************************************************************************************
86 MGClusterCommand::MGClusterCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["list"] = tempOutNames;
92 outputTypes["rabund"] = tempOutNames;
93 outputTypes["sabund"] = tempOutNames;
96 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
100 //**********************************************************************************************************************
101 MGClusterCommand::MGClusterCommand(string option) {
103 abort = false; calledHelp = false;
105 //allow user to run help
106 if(option == "help") { help(); abort = true; calledHelp = true; }
107 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string, string> parameters = parser.getParameters();
115 ValidParameters validParameter;
116 map<string,string>::iterator it;
118 //check to make sure all parameters are valid for command
119 for (it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //initialize outputTypes
124 vector<string> tempOutNames;
125 outputTypes["list"] = tempOutNames;
126 outputTypes["rabund"] = tempOutNames;
127 outputTypes["sabund"] = tempOutNames;
129 //if the user changes the input directory command factory will send this info to us in the output parameter
130 string inputDir = validParameter.validFile(parameters, "inputdir", false);
131 if (inputDir == "not found"){ inputDir = ""; }
134 it = parameters.find("blast");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["blast"] = inputDir + it->second; }
142 it = parameters.find("name");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["name"] = inputDir + it->second; }
152 //check for required parameters
153 blastfile = validParameter.validFile(parameters, "blast", true);
154 if (blastfile == "not open") { blastfile = ""; abort = true; }
155 else if (blastfile == "not found") { blastfile = ""; }
157 //if the user changes the output directory command factory will send this info to us in the output parameter
158 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
160 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
163 namefile = validParameter.validFile(parameters, "name", true);
164 if (namefile == "not open") { abort = true; }
165 else if (namefile == "not found") { namefile = ""; }
166 else { m->setNameFile(namefile); }
168 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
170 //check for optional parameter and set defaults
172 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
173 precisionLength = temp.length();
174 m->mothurConvert(temp, precision);
176 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
177 m->mothurConvert(temp, cutoff);
178 cutoff += (5 / (precision * 10.0));
180 method = validParameter.validFile(parameters, "method", false);
181 if (method == "not found") { method = "average"; }
183 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
184 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
186 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
187 m->mothurConvert(temp, length);
189 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
190 m->mothurConvert(temp, penalty);
192 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
193 minWanted = m->isTrue(temp);
195 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
196 merge = m->isTrue(temp);
198 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
199 hclusterWanted = m->isTrue(temp);
201 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
202 hard = m->isTrue(temp);
206 catch(exception& e) {
207 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
211 //**********************************************************************************************************************
212 int MGClusterCommand::execute(){
215 if (abort == true) { if (calledHelp) { return 0; } return 2; }
218 if (namefile != "") {
219 nameMap = new NameAssignment(namefile);
221 }else{ nameMap= new NameAssignment(); }
223 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
226 float previousDist = 0.00000;
227 float rndPreviousDist = 0.00000;
229 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
230 //must remember to delete those objects here since readBlast does not
231 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
234 list = new ListVector(nameMap->getListVector());
235 RAbundVector* rabund = NULL;
237 map<string, int> nameMapCounts = m->readNames(namefile);
238 RAbundVector* rabund = newFunctionToCreateRabund(list, nameMapCounts);
240 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
244 //list = new ListVector(nameMap->getListVector());
245 //RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
247 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
251 map<string, int> Seq2Bin;
252 map<string, int> oldSeq2Bin;
254 if (method == "furthest") { tag = "fn"; }
255 else if (method == "nearest") { tag = "nn"; }
258 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
259 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
260 string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
262 m->openOutputFile(sabundFileName, sabundFile);
263 m->openOutputFile(rabundFileName, rabundFile);
264 m->openOutputFile(listFileName, listFile);
266 if (m->control_pressed) {
267 delete nameMap; delete read; delete list; delete rabund;
268 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
273 double saveCutoff = cutoff;
275 if (!hclusterWanted) {
276 //get distmatrix and overlap
277 SparseMatrix* distMatrix = read->getDistMatrix();
278 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
282 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
283 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
284 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
285 cluster->setMapWanted(true);
286 Seq2Bin = cluster->getSeqtoBin();
287 oldSeq2Bin = Seq2Bin;
289 if (m->control_pressed) {
290 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
291 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
296 //cluster using cluster classes
297 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
299 cluster->update(cutoff);
301 if (m->control_pressed) {
302 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
303 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
308 float dist = distMatrix->getSmallDist();
311 rndDist = m->ceilDist(dist, precision);
313 rndDist = m->roundDist(dist, precision);
316 if(previousDist <= 0.0000 && dist != previousDist){
317 oldList.setLabel("unique");
320 else if(rndDist != rndPreviousDist){
322 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
324 if (m->control_pressed) {
325 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
326 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
331 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
335 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
341 rndPreviousDist = rndDist;
343 Seq2Bin = cluster->getSeqtoBin();
344 oldSeq2Bin = Seq2Bin;
347 if(previousDist <= 0.0000){
348 oldList.setLabel("unique");
351 else if(rndPreviousDist<cutoff){
353 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
355 if (m->control_pressed) {
356 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
357 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
362 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
366 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
372 overlapMatrix.clear();
376 }else { //use hcluster to cluster
377 //get distmatrix and overlap
378 overlapFile = read->getOverlapFile();
379 distFile = read->getDistFile();
382 //sort the distance and overlap files
383 sortHclusterFiles(distFile, overlapFile);
385 if (m->control_pressed) {
386 delete nameMap; delete list; delete rabund;
387 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
393 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
394 hcluster->setMapWanted(true);
395 Seq2Bin = cluster->getSeqtoBin();
396 oldSeq2Bin = Seq2Bin;
398 vector<seqDist> seqs; seqs.resize(1); // to start loop
399 //ifstream inHcluster;
400 //m->openInputFile(distFile, inHcluster);
402 if (m->control_pressed) {
403 delete nameMap; delete list; delete rabund; delete hcluster;
404 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
409 while (seqs.size() != 0){
411 seqs = hcluster->getSeqs();
413 //to account for cutoff change in average neighbor
414 if (seqs.size() != 0) {
415 if (seqs[0].dist > cutoff) { break; }
418 if (m->control_pressed) {
419 delete nameMap; delete list; delete rabund; delete hcluster;
420 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
421 m->mothurRemove(distFile);
422 m->mothurRemove(overlapFile);
427 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
429 if (seqs[i].seq1 != seqs[i].seq2) {
431 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
433 if (m->control_pressed) {
434 delete nameMap; delete list; delete rabund; delete hcluster;
435 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
436 m->mothurRemove(distFile);
437 m->mothurRemove(overlapFile);
444 rndDist = m->ceilDist(seqs[i].dist, precision);
446 rndDist = m->roundDist(seqs[i].dist, precision);
449 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
450 oldList.setLabel("unique");
453 else if((rndDist != rndPreviousDist)){
455 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
457 if (m->control_pressed) {
458 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
459 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
460 m->mothurRemove(distFile);
461 m->mothurRemove(overlapFile);
466 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
470 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
475 previousDist = seqs[i].dist;
476 rndPreviousDist = rndDist;
478 Seq2Bin = cluster->getSeqtoBin();
479 oldSeq2Bin = Seq2Bin;
483 //inHcluster.close();
485 if(previousDist <= 0.0000){
486 oldList.setLabel("unique");
489 else if(rndPreviousDist<cutoff){
491 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
493 if (m->control_pressed) {
494 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
495 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
496 m->mothurRemove(distFile);
497 m->mothurRemove(overlapFile);
502 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
506 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
512 m->mothurRemove(distFile);
513 m->mothurRemove(overlapFile);
522 if (m->control_pressed) {
524 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
529 m->mothurOutEndLine();
530 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
532 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
533 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
534 m->mothurOutEndLine();
536 if (saveCutoff != cutoff) {
537 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
538 else { saveCutoff = m->roundDist(saveCutoff, precision); }
540 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
543 //set list file as new current listfile
545 itTypes = outputTypes.find("list");
546 if (itTypes != outputTypes.end()) {
547 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
550 //set rabund file as new current rabundfile
551 itTypes = outputTypes.find("rabund");
552 if (itTypes != outputTypes.end()) {
553 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
556 //set sabund file as new current sabundfile
557 itTypes = outputTypes.find("sabund");
558 if (itTypes != outputTypes.end()) {
559 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
563 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
567 catch(exception& e) {
568 m->errorOut(e, "MGClusterCommand", "execute");
572 //**********************************************************************************************************************
573 void MGClusterCommand::printData(ListVector* mergedList){
575 mergedList->print(listFile);
576 mergedList->getRAbundVector().print(rabundFile);
578 SAbundVector sabund = mergedList->getSAbundVector();
581 sabund.print(sabundFile);
583 catch(exception& e) {
584 m->errorOut(e, "MGClusterCommand", "printData");
588 //**********************************************************************************************************************
589 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
590 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
591 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
593 //create new listvector so you don't overwrite the clustering
594 ListVector* newList = new ListVector(oldList);
600 if (hclusterWanted) {
601 m->openInputFile(overlapFile, inOverlap);
602 if (inOverlap.eof()) { done = true; }
603 }else { if (overlapMatrix.size() == 0) { done = true; } }
606 if (m->control_pressed) {
607 if (hclusterWanted) { inOverlap.close(); }
613 if (!hclusterWanted) {
614 if (count < overlapMatrix.size()) { //do we have another node in the matrix
615 overlapNode = overlapMatrix[count];
619 if (!inOverlap.eof()) {
620 string firstName, secondName;
621 float overlapDistance;
622 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
624 //commented out because we check this in readblast already
625 //map<string,int>::iterator itA = nameMap->find(firstName);
626 //map<string,int>::iterator itB = nameMap->find(secondName);
627 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
628 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
630 //overlapNode.seq1 = itA->second;
631 //overlapNode.seq2 = itB->second;
632 overlapNode.seq1 = nameMap->get(firstName);
633 overlapNode.seq2 = nameMap->get(secondName);
634 overlapNode.dist = overlapDistance;
635 }else { inOverlap.close(); break; }
638 if (overlapNode.dist < dist) {
639 //get names of seqs that overlap
640 string name1 = nameMap->get(overlapNode.seq1);
641 string name2 = nameMap->get(overlapNode.seq2);
643 //use binInfo to find out if they are already in the same bin
644 //map<string, int>::iterator itBin1 = binInfo.find(name1);
645 //map<string, int>::iterator itBin2 = binInfo.find(name2);
647 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
648 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
650 //int binKeep = itBin1->second;
651 //int binRemove = itBin2->second;
653 int binKeep = binInfo[name1];
654 int binRemove = binInfo[name2];
656 //if not merge bins and update binInfo
657 if(binKeep != binRemove) {
659 //save names in old bin
660 string names = newList->get(binRemove);
662 //merge bins into name1s bin
663 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
664 newList->set(binRemove, "");
667 while (names.find_first_of(',') != -1) {
669 string name = names.substr(0,names.find_first_of(','));
670 //save name and bin number
671 binInfo[name] = binKeep;
672 names = names.substr(names.find_first_of(',')+1, names.length());
676 binInfo[names] = binKeep;
679 }else { done = true; }
686 catch(exception& e) {
687 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
691 //**********************************************************************************************************************
692 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
695 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
696 m->mothurRemove(unsortedDist); //delete unsorted file
697 distFile = sortedDistFile;
700 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
701 m->mothurRemove(unsortedOverlap); //delete unsorted file
702 overlapFile = sortedOverlapFile;
704 catch(exception& e) {
705 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
710 //**********************************************************************************************************************