5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
18 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
67 string outputFileName = "";
68 map<string, vector<string> >::iterator it;
70 //is this a type this command creates
71 it = outputTypes.find(type);
72 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74 if (type == "list") { outputFileName = "list"; }
75 else if (type == "rabund") { outputFileName = "rabund"; }
76 else if (type == "sabund") { outputFileName = "sabund"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
79 return outputFileName;
82 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["list"] = tempOutNames;
93 outputTypes["rabund"] = tempOutNames;
94 outputTypes["sabund"] = tempOutNames;
97 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
104 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111 vector<string> myArray = setParameters();
113 OptionParser parser(option);
114 map<string, string> parameters = parser.getParameters();
116 ValidParameters validParameter;
117 map<string,string>::iterator it;
119 //check to make sure all parameters are valid for command
120 for (it = parameters.begin(); it != parameters.end(); it++) {
121 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
124 //initialize outputTypes
125 vector<string> tempOutNames;
126 outputTypes["list"] = tempOutNames;
127 outputTypes["rabund"] = tempOutNames;
128 outputTypes["sabund"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("blast");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["blast"] = inputDir + it->second; }
143 it = parameters.find("name");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["name"] = inputDir + it->second; }
153 //check for required parameters
154 blastfile = validParameter.validFile(parameters, "blast", true);
155 if (blastfile == "not open") { blastfile = ""; abort = true; }
156 else if (blastfile == "not found") { blastfile = ""; }
158 //if the user changes the output directory command factory will send this info to us in the output parameter
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
161 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
164 namefile = validParameter.validFile(parameters, "name", true);
165 if (namefile == "not open") { abort = true; }
166 else if (namefile == "not found") { namefile = ""; }
167 else { m->setNameFile(namefile); }
169 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
171 //check for optional parameter and set defaults
173 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "false"; }
174 large = m->isTrue(temp);
176 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
177 precisionLength = temp.length();
178 m->mothurConvert(temp, precision);
180 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
181 m->mothurConvert(temp, cutoff);
182 cutoff += (5 / (precision * 10.0));
184 method = validParameter.validFile(parameters, "method", false);
185 if (method == "not found") { method = "average"; }
187 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
188 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
190 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
191 m->mothurConvert(temp, length);
193 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
194 m->mothurConvert(temp, penalty);
196 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
197 minWanted = m->isTrue(temp);
199 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
200 merge = m->isTrue(temp);
202 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
203 hclusterWanted = m->isTrue(temp);
205 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
206 hard = m->isTrue(temp);
210 catch(exception& e) {
211 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
215 //**********************************************************************************************************************
216 int MGClusterCommand::execute(){
218 if (abort == true) { if (calledHelp) { return 0; } return 2; }
221 if (namefile != "") {
222 nameMap = new NameAssignment(namefile);
224 }else{ nameMap= new NameAssignment(); }
226 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
229 float previousDist = 0.00000;
230 float rndPreviousDist = 0.00000;
232 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
233 //must remember to delete those objects here since readBlast does not
234 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
237 list = new ListVector(nameMap->getListVector());
238 RAbundVector* rabund = NULL;
241 map<string, int> nameMapCounts = m->readNames(namefile);
242 createRabund(nameMapCounts);
245 rabund = new RAbundVector(list->getRAbundVector());
249 //list = new ListVector(nameMap->getListVector());
250 //rabund = new RAbundVector(list->getRAbundVector());
252 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
256 map<string, int> Seq2Bin;
257 map<string, int> oldSeq2Bin;
259 if (method == "furthest") { tag = "fn"; }
260 else if (method == "nearest") { tag = "nn"; }
263 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
264 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
265 string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
267 m->openOutputFile(sabundFileName, sabundFile);
268 m->openOutputFile(rabundFileName, rabundFile);
269 m->openOutputFile(listFileName, listFile);
271 if (m->control_pressed) {
272 delete nameMap; delete read; delete list; delete rabund;
273 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
278 double saveCutoff = cutoff;
280 if (!hclusterWanted) {
281 //get distmatrix and overlap
282 SparseMatrix* distMatrix = read->getDistMatrix();
283 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
287 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
288 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
289 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
290 cluster->setMapWanted(true);
291 Seq2Bin = cluster->getSeqtoBin();
292 oldSeq2Bin = Seq2Bin;
294 if (m->control_pressed) {
295 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
296 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
301 //cluster using cluster classes
302 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
304 cluster->update(cutoff);
306 if (m->control_pressed) {
307 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
308 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
313 float dist = distMatrix->getSmallDist();
316 rndDist = m->ceilDist(dist, precision);
318 rndDist = m->roundDist(dist, precision);
321 if(previousDist <= 0.0000 && dist != previousDist){
322 oldList.setLabel("unique");
325 else if(rndDist != rndPreviousDist){
327 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
329 if (m->control_pressed) {
330 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
331 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
336 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
340 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
346 rndPreviousDist = rndDist;
348 Seq2Bin = cluster->getSeqtoBin();
349 oldSeq2Bin = Seq2Bin;
352 if(previousDist <= 0.0000){
353 oldList.setLabel("unique");
356 else if(rndPreviousDist<cutoff){
358 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
360 if (m->control_pressed) {
361 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
362 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
367 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
371 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
377 overlapMatrix.clear();
381 }else { //use hcluster to cluster
382 //get distmatrix and overlap
383 overlapFile = read->getOverlapFile();
384 distFile = read->getDistFile();
387 //sort the distance and overlap files
388 sortHclusterFiles(distFile, overlapFile);
390 if (m->control_pressed) {
391 delete nameMap; delete list; delete rabund;
392 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
398 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
399 hcluster->setMapWanted(true);
400 Seq2Bin = cluster->getSeqtoBin();
401 oldSeq2Bin = Seq2Bin;
403 vector<seqDist> seqs; seqs.resize(1); // to start loop
404 //ifstream inHcluster;
405 //m->openInputFile(distFile, inHcluster);
407 if (m->control_pressed) {
408 delete nameMap; delete list; delete rabund; delete hcluster;
409 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
414 while (seqs.size() != 0){
416 seqs = hcluster->getSeqs();
418 //to account for cutoff change in average neighbor
419 if (seqs.size() != 0) {
420 if (seqs[0].dist > cutoff) { break; }
423 if (m->control_pressed) {
424 delete nameMap; delete list; delete rabund; delete hcluster;
425 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
426 m->mothurRemove(distFile);
427 m->mothurRemove(overlapFile);
432 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
434 if (seqs[i].seq1 != seqs[i].seq2) {
436 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
438 if (m->control_pressed) {
439 delete nameMap; delete list; delete rabund; delete hcluster;
440 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
441 m->mothurRemove(distFile);
442 m->mothurRemove(overlapFile);
449 rndDist = m->ceilDist(seqs[i].dist, precision);
451 rndDist = m->roundDist(seqs[i].dist, precision);
454 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
455 oldList.setLabel("unique");
458 else if((rndDist != rndPreviousDist)){
460 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
462 if (m->control_pressed) {
463 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
464 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
465 m->mothurRemove(distFile);
466 m->mothurRemove(overlapFile);
471 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
475 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
480 previousDist = seqs[i].dist;
481 rndPreviousDist = rndDist;
483 Seq2Bin = cluster->getSeqtoBin();
484 oldSeq2Bin = Seq2Bin;
488 //inHcluster.close();
490 if(previousDist <= 0.0000){
491 oldList.setLabel("unique");
494 else if(rndPreviousDist<cutoff){
496 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
498 if (m->control_pressed) {
499 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
500 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
501 m->mothurRemove(distFile);
502 m->mothurRemove(overlapFile);
507 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
511 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
517 m->mothurRemove(distFile);
518 m->mothurRemove(overlapFile);
522 if (!large) {delete rabund;}
527 if (m->control_pressed) {
529 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
534 m->mothurOutEndLine();
535 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
536 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
537 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
538 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
539 m->mothurOutEndLine();
541 if (saveCutoff != cutoff) {
542 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
543 else { saveCutoff = m->roundDist(saveCutoff, precision); }
545 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
548 //set list file as new current listfile
550 itTypes = outputTypes.find("list");
551 if (itTypes != outputTypes.end()) {
552 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
555 //set rabund file as new current rabundfile
556 itTypes = outputTypes.find("rabund");
557 if (itTypes != outputTypes.end()) {
558 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
561 //set sabund file as new current sabundfile
562 itTypes = outputTypes.find("sabund");
563 if (itTypes != outputTypes.end()) {
564 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
568 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
572 catch(exception& e) {
573 m->errorOut(e, "MGClusterCommand", "execute");
577 //**********************************************************************************************************************
578 void MGClusterCommand::printData(ListVector* mergedList){
580 mergedList->print(listFile);
581 mergedList->getRAbundVector().print(rabundFile);
583 SAbundVector sabund = mergedList->getSAbundVector();
586 sabund.print(sabundFile);
588 catch(exception& e) {
589 m->errorOut(e, "MGClusterCommand", "printData");
593 //**********************************************************************************************************************
594 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
595 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
596 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
598 //create new listvector so you don't overwrite the clustering
599 ListVector* newList = new ListVector(oldList);
605 if (hclusterWanted) {
606 m->openInputFile(overlapFile, inOverlap);
607 if (inOverlap.eof()) { done = true; }
608 }else { if (overlapMatrix.size() == 0) { done = true; } }
611 if (m->control_pressed) {
612 if (hclusterWanted) { inOverlap.close(); }
618 if (!hclusterWanted) {
619 if (count < overlapMatrix.size()) { //do we have another node in the matrix
620 overlapNode = overlapMatrix[count];
624 if (!inOverlap.eof()) {
625 string firstName, secondName;
626 float overlapDistance;
627 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
629 //commented out because we check this in readblast already
630 //map<string,int>::iterator itA = nameMap->find(firstName);
631 //map<string,int>::iterator itB = nameMap->find(secondName);
632 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
633 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
635 //overlapNode.seq1 = itA->second;
636 //overlapNode.seq2 = itB->second;
637 overlapNode.seq1 = nameMap->get(firstName);
638 overlapNode.seq2 = nameMap->get(secondName);
639 overlapNode.dist = overlapDistance;
640 }else { inOverlap.close(); break; }
643 if (overlapNode.dist < dist) {
644 //get names of seqs that overlap
645 string name1 = nameMap->get(overlapNode.seq1);
646 string name2 = nameMap->get(overlapNode.seq2);
648 //use binInfo to find out if they are already in the same bin
649 //map<string, int>::iterator itBin1 = binInfo.find(name1);
650 //map<string, int>::iterator itBin2 = binInfo.find(name2);
652 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
653 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
655 //int binKeep = itBin1->second;
656 //int binRemove = itBin2->second;
658 int binKeep = binInfo[name1];
659 int binRemove = binInfo[name2];
661 //if not merge bins and update binInfo
662 if(binKeep != binRemove) {
664 //save names in old bin
665 string names = newList->get(binRemove);
667 //merge bins into name1s bin
668 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
669 newList->set(binRemove, "");
672 while (names.find_first_of(',') != -1) {
674 string name = names.substr(0,names.find_first_of(','));
675 //save name and bin number
676 binInfo[name] = binKeep;
677 names = names.substr(names.find_first_of(',')+1, names.length());
681 binInfo[names] = binKeep;
684 }else { done = true; }
691 catch(exception& e) {
692 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
696 //**********************************************************************************************************************
697 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
700 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
701 m->mothurRemove(unsortedDist); //delete unsorted file
702 distFile = sortedDistFile;
705 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
706 m->mothurRemove(unsortedOverlap); //delete unsorted file
707 overlapFile = sortedOverlapFile;
709 catch(exception& e) {
710 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
715 //**********************************************************************************************************************
717 void MGClusterCommand::createRabund(map<string, int> nameMapCounts){
720 map<string,int>::iterator it;
721 //it = nameMapCounts.begin();
722 //for(int i = 0; i < list->getNumBins(); i++) { rav.push_back((*it).second); it++; }
723 for ( it=nameMapCounts.begin(); it!=nameMapCounts.end(); it++ ) { rav.push_back( it->second ); }
726 catch(exception& e) {
727 m->errorOut(e, "MGClusterCommand", "createRabund");
733 //**********************************************************************************************************************