5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
18 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
67 string outputFileName = "";
68 map<string, vector<string> >::iterator it;
70 //is this a type this command creates
71 it = outputTypes.find(type);
72 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74 if (type == "list") { outputFileName = "list"; }
75 else if (type == "rabund") { outputFileName = "rabund"; }
76 else if (type == "sabund") { outputFileName = "sabund"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
79 return outputFileName;
82 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["list"] = tempOutNames;
93 outputTypes["rabund"] = tempOutNames;
94 outputTypes["sabund"] = tempOutNames;
97 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
104 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111 vector<string> myArray = setParameters();
113 OptionParser parser(option);
114 map<string, string> parameters = parser.getParameters();
116 ValidParameters validParameter;
117 map<string,string>::iterator it;
119 //check to make sure all parameters are valid for command
120 for (it = parameters.begin(); it != parameters.end(); it++) {
121 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
124 //initialize outputTypes
125 vector<string> tempOutNames;
126 outputTypes["list"] = tempOutNames;
127 outputTypes["rabund"] = tempOutNames;
128 outputTypes["sabund"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("blast");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["blast"] = inputDir + it->second; }
143 it = parameters.find("name");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["name"] = inputDir + it->second; }
153 //check for required parameters
154 blastfile = validParameter.validFile(parameters, "blast", true);
155 if (blastfile == "not open") { blastfile = ""; abort = true; }
156 else if (blastfile == "not found") { blastfile = ""; }
158 //if the user changes the output directory command factory will send this info to us in the output parameter
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
161 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
164 namefile = validParameter.validFile(parameters, "name", true);
165 if (namefile == "not open") { abort = true; }
166 else if (namefile == "not found") { namefile = ""; }
167 else { m->setNameFile(namefile); }
169 countfile = validParameter.validFile(parameters, "count", true);
170 if (countfile == "not open") { abort = true; }
171 else if (countfile == "not found") { countfile = ""; }
172 else { m->setCountTableFile(countfile); }
174 if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
176 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
178 //check for optional parameter and set defaults
180 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
181 precisionLength = temp.length();
182 m->mothurConvert(temp, precision);
184 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
185 m->mothurConvert(temp, cutoff);
186 cutoff += (5 / (precision * 10.0));
188 method = validParameter.validFile(parameters, "method", false);
189 if (method == "not found") { method = "average"; }
191 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
192 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
194 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
195 m->mothurConvert(temp, length);
197 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
198 m->mothurConvert(temp, penalty);
200 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
201 minWanted = m->isTrue(temp);
203 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
204 merge = m->isTrue(temp);
206 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
207 hclusterWanted = m->isTrue(temp);
209 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
210 hard = m->isTrue(temp);
214 catch(exception& e) {
215 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
219 //**********************************************************************************************************************
220 int MGClusterCommand::execute(){
222 if (abort == true) { if (calledHelp) { return 0; } return 2; }
225 if (namefile != "") {
226 nameMap = new NameAssignment(namefile);
228 }else{ nameMap= new NameAssignment(); }
230 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
233 float previousDist = 0.00000;
234 float rndPreviousDist = 0.00000;
236 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
237 //must remember to delete those objects here since readBlast does not
238 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
241 list = new ListVector(nameMap->getListVector());
242 RAbundVector* rabund = NULL;
244 if(countfile != "") {
245 //map<string, int> nameMapCounts = m->readNames(namefile);
246 ct = new CountTable();
247 ct->readTable(countfile);
248 rabund = new RAbundVector();
249 createRabund(ct, list, rabund);
251 rabund = new RAbundVector(list->getRAbundVector());
255 //list = new ListVector(nameMap->getListVector());
256 //rabund = new RAbundVector(list->getRAbundVector());
258 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
262 map<string, int> Seq2Bin;
263 map<string, int> oldSeq2Bin;
265 if (method == "furthest") { tag = "fn"; }
266 else if (method == "nearest") { tag = "nn"; }
269 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
270 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
271 string listFileName = fileroot+ tag + ".";
272 if (countfile != "") { listFileName += "unique_"; }
273 listFileName += getOutputFileNameTag("list");
275 if (countfile == "") {
276 m->openOutputFile(sabundFileName, sabundFile);
277 m->openOutputFile(rabundFileName, rabundFile);
279 m->openOutputFile(listFileName, listFile);
281 if (m->control_pressed) {
282 delete nameMap; delete read; delete list; delete rabund;
283 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
288 double saveCutoff = cutoff;
290 if (!hclusterWanted) {
291 //get distmatrix and overlap
292 SparseDistanceMatrix* distMatrix = read->getDistMatrix();
293 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
297 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
298 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
299 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
300 cluster->setMapWanted(true);
301 Seq2Bin = cluster->getSeqtoBin();
302 oldSeq2Bin = Seq2Bin;
304 if (m->control_pressed) {
305 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
306 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
311 //cluster using cluster classes
312 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
314 cluster->update(cutoff);
316 if (m->control_pressed) {
317 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
318 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
323 float dist = distMatrix->getSmallDist();
326 rndDist = m->ceilDist(dist, precision);
328 rndDist = m->roundDist(dist, precision);
331 if(previousDist <= 0.0000 && dist != previousDist){
332 oldList.setLabel("unique");
335 else if(rndDist != rndPreviousDist){
337 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
339 if (m->control_pressed) {
340 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
341 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
346 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
350 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
356 rndPreviousDist = rndDist;
358 Seq2Bin = cluster->getSeqtoBin();
359 oldSeq2Bin = Seq2Bin;
362 if(previousDist <= 0.0000){
363 oldList.setLabel("unique");
366 else if(rndPreviousDist<cutoff){
368 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
370 if (m->control_pressed) {
371 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
372 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
377 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
381 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
387 overlapMatrix.clear();
391 }else { //use hcluster to cluster
392 //get distmatrix and overlap
393 overlapFile = read->getOverlapFile();
394 distFile = read->getDistFile();
397 //sort the distance and overlap files
398 sortHclusterFiles(distFile, overlapFile);
400 if (m->control_pressed) {
401 delete nameMap; delete list; delete rabund;
402 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
408 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
409 hcluster->setMapWanted(true);
410 Seq2Bin = cluster->getSeqtoBin();
411 oldSeq2Bin = Seq2Bin;
413 vector<seqDist> seqs; seqs.resize(1); // to start loop
414 //ifstream inHcluster;
415 //m->openInputFile(distFile, inHcluster);
417 if (m->control_pressed) {
418 delete nameMap; delete list; delete rabund; delete hcluster;
419 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
424 while (seqs.size() != 0){
426 seqs = hcluster->getSeqs();
428 //to account for cutoff change in average neighbor
429 if (seqs.size() != 0) {
430 if (seqs[0].dist > cutoff) { break; }
433 if (m->control_pressed) {
434 delete nameMap; delete list; delete rabund; delete hcluster;
435 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
436 m->mothurRemove(distFile);
437 m->mothurRemove(overlapFile);
442 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
444 if (seqs[i].seq1 != seqs[i].seq2) {
446 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
448 if (m->control_pressed) {
449 delete nameMap; delete list; delete rabund; delete hcluster;
450 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
451 m->mothurRemove(distFile);
452 m->mothurRemove(overlapFile);
459 rndDist = m->ceilDist(seqs[i].dist, precision);
461 rndDist = m->roundDist(seqs[i].dist, precision);
464 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
465 oldList.setLabel("unique");
468 else if((rndDist != rndPreviousDist)){
470 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
472 if (m->control_pressed) {
473 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
474 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
475 m->mothurRemove(distFile);
476 m->mothurRemove(overlapFile);
481 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
485 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
490 previousDist = seqs[i].dist;
491 rndPreviousDist = rndDist;
493 Seq2Bin = cluster->getSeqtoBin();
494 oldSeq2Bin = Seq2Bin;
498 //inHcluster.close();
500 if(previousDist <= 0.0000){
501 oldList.setLabel("unique");
504 else if(rndPreviousDist<cutoff){
506 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
508 if (m->control_pressed) {
509 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
510 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
511 m->mothurRemove(distFile);
512 m->mothurRemove(overlapFile);
517 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
521 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
527 m->mothurRemove(distFile);
528 m->mothurRemove(overlapFile);
534 if (countfile == "") {
538 if (m->control_pressed) {
540 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
545 m->mothurOutEndLine();
546 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
547 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
548 if (countfile == "") {
549 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
550 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
552 m->mothurOutEndLine();
554 if (saveCutoff != cutoff) {
555 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
556 else { saveCutoff = m->roundDist(saveCutoff, precision); }
558 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
561 //set list file as new current listfile
563 itTypes = outputTypes.find("list");
564 if (itTypes != outputTypes.end()) {
565 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
568 //set rabund file as new current rabundfile
569 itTypes = outputTypes.find("rabund");
570 if (itTypes != outputTypes.end()) {
571 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
574 //set sabund file as new current sabundfile
575 itTypes = outputTypes.find("sabund");
576 if (itTypes != outputTypes.end()) {
577 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
581 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
585 catch(exception& e) {
586 m->errorOut(e, "MGClusterCommand", "execute");
590 //**********************************************************************************************************************
591 void MGClusterCommand::printData(ListVector* mergedList){
593 mergedList->print(listFile);
594 SAbundVector sabund = mergedList->getSAbundVector();
596 if (countfile == "") {
597 mergedList->getRAbundVector().print(rabundFile);
598 sabund.print(sabundFile);
603 catch(exception& e) {
604 m->errorOut(e, "MGClusterCommand", "printData");
608 //**********************************************************************************************************************
609 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
610 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
611 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
613 //create new listvector so you don't overwrite the clustering
614 ListVector* newList = new ListVector(oldList);
620 if (hclusterWanted) {
621 m->openInputFile(overlapFile, inOverlap);
622 if (inOverlap.eof()) { done = true; }
623 }else { if (overlapMatrix.size() == 0) { done = true; } }
626 if (m->control_pressed) {
627 if (hclusterWanted) { inOverlap.close(); }
633 if (!hclusterWanted) {
634 if (count < overlapMatrix.size()) { //do we have another node in the matrix
635 overlapNode = overlapMatrix[count];
639 if (!inOverlap.eof()) {
640 string firstName, secondName;
641 float overlapDistance;
642 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
644 //commented out because we check this in readblast already
645 //map<string,int>::iterator itA = nameMap->find(firstName);
646 //map<string,int>::iterator itB = nameMap->find(secondName);
647 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
648 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
650 //overlapNode.seq1 = itA->second;
651 //overlapNode.seq2 = itB->second;
652 overlapNode.seq1 = nameMap->get(firstName);
653 overlapNode.seq2 = nameMap->get(secondName);
654 overlapNode.dist = overlapDistance;
655 }else { inOverlap.close(); break; }
658 if (overlapNode.dist < dist) {
659 //get names of seqs that overlap
660 string name1 = nameMap->get(overlapNode.seq1);
661 string name2 = nameMap->get(overlapNode.seq2);
663 //use binInfo to find out if they are already in the same bin
664 //map<string, int>::iterator itBin1 = binInfo.find(name1);
665 //map<string, int>::iterator itBin2 = binInfo.find(name2);
667 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
668 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
670 //int binKeep = itBin1->second;
671 //int binRemove = itBin2->second;
673 int binKeep = binInfo[name1];
674 int binRemove = binInfo[name2];
676 //if not merge bins and update binInfo
677 if(binKeep != binRemove) {
679 //save names in old bin
680 string names = newList->get(binRemove);
682 //merge bins into name1s bin
683 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
684 newList->set(binRemove, "");
687 while (names.find_first_of(',') != -1) {
689 string name = names.substr(0,names.find_first_of(','));
690 //save name and bin number
691 binInfo[name] = binKeep;
692 names = names.substr(names.find_first_of(',')+1, names.length());
696 binInfo[names] = binKeep;
699 }else { done = true; }
706 catch(exception& e) {
707 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
711 //**********************************************************************************************************************
712 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
715 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
716 m->mothurRemove(unsortedDist); //delete unsorted file
717 distFile = sortedDistFile;
720 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
721 m->mothurRemove(unsortedOverlap); //delete unsorted file
722 overlapFile = sortedOverlapFile;
724 catch(exception& e) {
725 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
730 //**********************************************************************************************************************
732 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
734 //vector<string> names = ct.getNamesOfSeqs();
736 //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
739 for(int i = 0; i < list->getNumBins(); i++) {
740 vector<string> binNames;
741 string bin = list->get(i);
742 m->splitAtComma(bin, binNames);
744 for (int j = 0; j < binNames.size(); j++) {
745 total += ct->getNumSeqs(binNames[j]);
747 rabund->push_back(total);
752 catch(exception& e) {
753 m->errorOut(e, "MGClusterCommand", "createRabund");
759 //**********************************************************************************************************************