5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
18 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
67 string outputFileName = "";
68 map<string, vector<string> >::iterator it;
70 //is this a type this command creates
71 it = outputTypes.find(type);
72 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74 if (type == "list") { outputFileName = "list"; }
75 else if (type == "rabund") { outputFileName = "rabund"; }
76 else if (type == "sabund") { outputFileName = "sabund"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
79 return outputFileName;
82 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["list"] = tempOutNames;
93 outputTypes["rabund"] = tempOutNames;
94 outputTypes["sabund"] = tempOutNames;
97 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
104 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111 vector<string> myArray = setParameters();
113 OptionParser parser(option);
114 map<string, string> parameters = parser.getParameters();
116 ValidParameters validParameter;
117 map<string,string>::iterator it;
119 //check to make sure all parameters are valid for command
120 for (it = parameters.begin(); it != parameters.end(); it++) {
121 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
124 //initialize outputTypes
125 vector<string> tempOutNames;
126 outputTypes["list"] = tempOutNames;
127 outputTypes["rabund"] = tempOutNames;
128 outputTypes["sabund"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("blast");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["blast"] = inputDir + it->second; }
143 it = parameters.find("name");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["name"] = inputDir + it->second; }
153 //check for required parameters
154 blastfile = validParameter.validFile(parameters, "blast", true);
155 if (blastfile == "not open") { blastfile = ""; abort = true; }
156 else if (blastfile == "not found") { blastfile = ""; }
158 //if the user changes the output directory command factory will send this info to us in the output parameter
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
161 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
164 namefile = validParameter.validFile(parameters, "name", true);
165 if (namefile == "not open") { abort = true; }
166 else if (namefile == "not found") { namefile = ""; }
167 else { m->setNameFile(namefile); }
169 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
171 //check for optional parameter and set defaults
173 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "false"; }
174 large = m->isTrue(temp);
176 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
177 precisionLength = temp.length();
178 m->mothurConvert(temp, precision);
180 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
181 m->mothurConvert(temp, cutoff);
182 cutoff += (5 / (precision * 10.0));
184 method = validParameter.validFile(parameters, "method", false);
185 if (method == "not found") { method = "average"; }
187 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
188 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
190 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
191 m->mothurConvert(temp, length);
193 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
194 m->mothurConvert(temp, penalty);
196 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
197 minWanted = m->isTrue(temp);
199 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
200 merge = m->isTrue(temp);
202 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
203 hclusterWanted = m->isTrue(temp);
205 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
206 hard = m->isTrue(temp);
210 catch(exception& e) {
211 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
215 //**********************************************************************************************************************
216 int MGClusterCommand::execute(){
219 if (abort == true) { if (calledHelp) { return 0; } return 2; }
222 if (namefile != "") {
223 nameMap = new NameAssignment(namefile);
225 }else{ nameMap= new NameAssignment(); }
227 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
230 float previousDist = 0.00000;
231 float rndPreviousDist = 0.00000;
233 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
234 //must remember to delete those objects here since readBlast does not
235 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
238 // list = new ListVector(nameMap->getListVector());
239 // RAbundVector* rabund = NULL;
242 // map<string, int> nameMapCounts = m->readNames(namefile);
243 // createRabund(nameMapCounts);
246 // rabund = new RAbundVector(list->getRAbundVector());
250 list = new ListVector(nameMap->getListVector());
251 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
253 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
257 map<string, int> Seq2Bin;
258 map<string, int> oldSeq2Bin;
260 if (method == "furthest") { tag = "fn"; }
261 else if (method == "nearest") { tag = "nn"; }
264 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
265 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
266 string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
268 m->openOutputFile(sabundFileName, sabundFile);
269 m->openOutputFile(rabundFileName, rabundFile);
270 m->openOutputFile(listFileName, listFile);
272 if (m->control_pressed) {
273 delete nameMap; delete read; delete list; delete rabund;
274 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
279 double saveCutoff = cutoff;
281 if (!hclusterWanted) {
282 //get distmatrix and overlap
283 SparseMatrix* distMatrix = read->getDistMatrix();
284 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
288 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
289 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
290 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
291 cluster->setMapWanted(true);
292 Seq2Bin = cluster->getSeqtoBin();
293 oldSeq2Bin = Seq2Bin;
295 if (m->control_pressed) {
296 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
297 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
302 //cluster using cluster classes
303 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
305 cluster->update(cutoff);
307 if (m->control_pressed) {
308 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
309 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
314 float dist = distMatrix->getSmallDist();
317 rndDist = m->ceilDist(dist, precision);
319 rndDist = m->roundDist(dist, precision);
322 if(previousDist <= 0.0000 && dist != previousDist){
323 oldList.setLabel("unique");
326 else if(rndDist != rndPreviousDist){
328 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
330 if (m->control_pressed) {
331 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
332 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
337 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
341 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
347 rndPreviousDist = rndDist;
349 Seq2Bin = cluster->getSeqtoBin();
350 oldSeq2Bin = Seq2Bin;
353 if(previousDist <= 0.0000){
354 oldList.setLabel("unique");
357 else if(rndPreviousDist<cutoff){
359 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
361 if (m->control_pressed) {
362 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
363 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
368 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
372 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
378 overlapMatrix.clear();
382 }else { //use hcluster to cluster
383 //get distmatrix and overlap
384 overlapFile = read->getOverlapFile();
385 distFile = read->getDistFile();
388 //sort the distance and overlap files
389 sortHclusterFiles(distFile, overlapFile);
391 if (m->control_pressed) {
392 delete nameMap; delete list; delete rabund;
393 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
399 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
400 hcluster->setMapWanted(true);
401 Seq2Bin = cluster->getSeqtoBin();
402 oldSeq2Bin = Seq2Bin;
404 vector<seqDist> seqs; seqs.resize(1); // to start loop
405 //ifstream inHcluster;
406 //m->openInputFile(distFile, inHcluster);
408 if (m->control_pressed) {
409 delete nameMap; delete list; delete rabund; delete hcluster;
410 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
415 while (seqs.size() != 0){
417 seqs = hcluster->getSeqs();
419 //to account for cutoff change in average neighbor
420 if (seqs.size() != 0) {
421 if (seqs[0].dist > cutoff) { break; }
424 if (m->control_pressed) {
425 delete nameMap; delete list; delete rabund; delete hcluster;
426 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
427 m->mothurRemove(distFile);
428 m->mothurRemove(overlapFile);
433 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
435 if (seqs[i].seq1 != seqs[i].seq2) {
437 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
439 if (m->control_pressed) {
440 delete nameMap; delete list; delete rabund; delete hcluster;
441 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
442 m->mothurRemove(distFile);
443 m->mothurRemove(overlapFile);
450 rndDist = m->ceilDist(seqs[i].dist, precision);
452 rndDist = m->roundDist(seqs[i].dist, precision);
455 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
456 oldList.setLabel("unique");
459 else if((rndDist != rndPreviousDist)){
461 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
463 if (m->control_pressed) {
464 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
465 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
466 m->mothurRemove(distFile);
467 m->mothurRemove(overlapFile);
472 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
476 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
481 previousDist = seqs[i].dist;
482 rndPreviousDist = rndDist;
484 Seq2Bin = cluster->getSeqtoBin();
485 oldSeq2Bin = Seq2Bin;
489 //inHcluster.close();
491 if(previousDist <= 0.0000){
492 oldList.setLabel("unique");
495 else if(rndPreviousDist<cutoff){
497 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
499 if (m->control_pressed) {
500 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
501 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
502 m->mothurRemove(distFile);
503 m->mothurRemove(overlapFile);
508 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
512 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
518 m->mothurRemove(distFile);
519 m->mothurRemove(overlapFile);
528 if (m->control_pressed) {
530 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
535 m->mothurOutEndLine();
536 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
537 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
538 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
539 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
540 m->mothurOutEndLine();
542 if (saveCutoff != cutoff) {
543 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
544 else { saveCutoff = m->roundDist(saveCutoff, precision); }
546 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
549 //set list file as new current listfile
551 itTypes = outputTypes.find("list");
552 if (itTypes != outputTypes.end()) {
553 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
556 //set rabund file as new current rabundfile
557 itTypes = outputTypes.find("rabund");
558 if (itTypes != outputTypes.end()) {
559 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
562 //set sabund file as new current sabundfile
563 itTypes = outputTypes.find("sabund");
564 if (itTypes != outputTypes.end()) {
565 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
569 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
573 catch(exception& e) {
574 m->errorOut(e, "MGClusterCommand", "execute");
578 //**********************************************************************************************************************
579 void MGClusterCommand::printData(ListVector* mergedList){
581 mergedList->print(listFile);
582 mergedList->getRAbundVector().print(rabundFile);
584 SAbundVector sabund = mergedList->getSAbundVector();
587 sabund.print(sabundFile);
589 catch(exception& e) {
590 m->errorOut(e, "MGClusterCommand", "printData");
594 //**********************************************************************************************************************
595 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
596 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
597 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
599 //create new listvector so you don't overwrite the clustering
600 ListVector* newList = new ListVector(oldList);
606 if (hclusterWanted) {
607 m->openInputFile(overlapFile, inOverlap);
608 if (inOverlap.eof()) { done = true; }
609 }else { if (overlapMatrix.size() == 0) { done = true; } }
612 if (m->control_pressed) {
613 if (hclusterWanted) { inOverlap.close(); }
619 if (!hclusterWanted) {
620 if (count < overlapMatrix.size()) { //do we have another node in the matrix
621 overlapNode = overlapMatrix[count];
625 if (!inOverlap.eof()) {
626 string firstName, secondName;
627 float overlapDistance;
628 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
630 //commented out because we check this in readblast already
631 //map<string,int>::iterator itA = nameMap->find(firstName);
632 //map<string,int>::iterator itB = nameMap->find(secondName);
633 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
634 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
636 //overlapNode.seq1 = itA->second;
637 //overlapNode.seq2 = itB->second;
638 overlapNode.seq1 = nameMap->get(firstName);
639 overlapNode.seq2 = nameMap->get(secondName);
640 overlapNode.dist = overlapDistance;
641 }else { inOverlap.close(); break; }
644 if (overlapNode.dist < dist) {
645 //get names of seqs that overlap
646 string name1 = nameMap->get(overlapNode.seq1);
647 string name2 = nameMap->get(overlapNode.seq2);
649 //use binInfo to find out if they are already in the same bin
650 //map<string, int>::iterator itBin1 = binInfo.find(name1);
651 //map<string, int>::iterator itBin2 = binInfo.find(name2);
653 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
654 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
656 //int binKeep = itBin1->second;
657 //int binRemove = itBin2->second;
659 int binKeep = binInfo[name1];
660 int binRemove = binInfo[name2];
662 //if not merge bins and update binInfo
663 if(binKeep != binRemove) {
665 //save names in old bin
666 string names = newList->get(binRemove);
668 //merge bins into name1s bin
669 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
670 newList->set(binRemove, "");
673 while (names.find_first_of(',') != -1) {
675 string name = names.substr(0,names.find_first_of(','));
676 //save name and bin number
677 binInfo[name] = binKeep;
678 names = names.substr(names.find_first_of(',')+1, names.length());
682 binInfo[names] = binKeep;
685 }else { done = true; }
692 catch(exception& e) {
693 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
697 //**********************************************************************************************************************
698 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
701 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
702 m->mothurRemove(unsortedDist); //delete unsorted file
703 distFile = sortedDistFile;
706 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
707 m->mothurRemove(unsortedOverlap); //delete unsorted file
708 overlapFile = sortedOverlapFile;
710 catch(exception& e) {
711 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
716 //**********************************************************************************************************************
718 //void MGClusterCommand::createRabund(map<string, int> nameMapCounts){
720 // //RAbundVector rav;
721 // map<string,int>::iterator it;
722 // //it = nameMapCounts.begin();
723 // //for(int i = 0; i < list->getNumBins(); i++) { rav.push_back((*it).second); it++; }
724 // for ( it=nameMapCounts.begin(); it!=nameMapCounts.end(); it++ ) { rav.push_back( it->second ); }
727 // catch(exception& e) {
728 // m->errorOut(e, "MGClusterCommand", "createRabund");
734 //**********************************************************************************************************************