5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
18 //CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
19 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
20 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
21 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
22 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
23 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
24 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
25 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
26 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
27 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
28 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31 vector<string> myArray;
32 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 m->errorOut(e, "MGClusterCommand", "setParameters");
40 //**********************************************************************************************************************
41 string MGClusterCommand::getHelpString(){
43 string helpString = "";
44 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
45 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
46 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
47 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
48 helpString += "The precision parameter's default value is 100. \n";
49 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
50 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
51 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
52 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
53 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
54 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
55 helpString += "The mgcluster command should be in the following format: \n";
56 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
57 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
61 m->errorOut(e, "MGClusterCommand", "getHelpString");
65 //**********************************************************************************************************************
66 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
68 string outputFileName = "";
69 map<string, vector<string> >::iterator it;
71 //is this a type this command creates
72 it = outputTypes.find(type);
73 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75 if (type == "list") { outputFileName = "list"; }
76 else if (type == "rabund") { outputFileName = "rabund"; }
77 else if (type == "sabund") { outputFileName = "sabund"; }
78 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
80 return outputFileName;
83 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
87 //**********************************************************************************************************************
88 MGClusterCommand::MGClusterCommand(){
90 abort = true; calledHelp = true;
92 vector<string> tempOutNames;
93 outputTypes["list"] = tempOutNames;
94 outputTypes["rabund"] = tempOutNames;
95 outputTypes["sabund"] = tempOutNames;
98 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
102 //**********************************************************************************************************************
103 MGClusterCommand::MGClusterCommand(string option) {
105 abort = false; calledHelp = false;
107 //allow user to run help
108 if(option == "help") { help(); abort = true; calledHelp = true; }
109 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
112 vector<string> myArray = setParameters();
114 OptionParser parser(option);
115 map<string, string> parameters = parser.getParameters();
117 ValidParameters validParameter;
118 map<string,string>::iterator it;
120 //check to make sure all parameters are valid for command
121 for (it = parameters.begin(); it != parameters.end(); it++) {
122 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
125 //initialize outputTypes
126 vector<string> tempOutNames;
127 outputTypes["list"] = tempOutNames;
128 outputTypes["rabund"] = tempOutNames;
129 outputTypes["sabund"] = tempOutNames;
131 //if the user changes the input directory command factory will send this info to us in the output parameter
132 string inputDir = validParameter.validFile(parameters, "inputdir", false);
133 if (inputDir == "not found"){ inputDir = ""; }
136 it = parameters.find("blast");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["blast"] = inputDir + it->second; }
144 it = parameters.find("name");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["name"] = inputDir + it->second; }
154 //check for required parameters
155 blastfile = validParameter.validFile(parameters, "blast", true);
156 if (blastfile == "not open") { blastfile = ""; abort = true; }
157 else if (blastfile == "not found") { blastfile = ""; }
159 //if the user changes the output directory command factory will send this info to us in the output parameter
160 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
162 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
165 namefile = validParameter.validFile(parameters, "name", true);
166 if (namefile == "not open") { abort = true; }
167 else if (namefile == "not found") { namefile = ""; }
168 else { m->setNameFile(namefile); }
170 countfile = validParameter.validFile(parameters, "count", true);
171 if (countfile == "not open") { abort = true; }
172 else if (countfile == "not found") { countfile = ""; }
173 else { m->setCountTableFile(countfile); }
175 if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
177 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
179 //check for optional parameter and set defaults
181 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "false"; }
182 large = m->isTrue(temp);
184 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
185 precisionLength = temp.length();
186 m->mothurConvert(temp, precision);
188 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
189 m->mothurConvert(temp, cutoff);
190 cutoff += (5 / (precision * 10.0));
192 method = validParameter.validFile(parameters, "method", false);
193 if (method == "not found") { method = "average"; }
195 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
196 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
198 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
199 m->mothurConvert(temp, length);
201 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
202 m->mothurConvert(temp, penalty);
204 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
205 minWanted = m->isTrue(temp);
207 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
208 merge = m->isTrue(temp);
210 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
211 hclusterWanted = m->isTrue(temp);
213 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
214 hard = m->isTrue(temp);
218 catch(exception& e) {
219 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
223 //**********************************************************************************************************************
224 int MGClusterCommand::execute(){
226 if (abort == true) { if (calledHelp) { return 0; } return 2; }
229 if (namefile != "") {
230 nameMap = new NameAssignment(namefile);
232 }else{ nameMap= new NameAssignment(); }
234 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
237 float previousDist = 0.00000;
238 float rndPreviousDist = 0.00000;
240 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
241 //must remember to delete those objects here since readBlast does not
242 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
245 list = new ListVector(nameMap->getListVector());
246 RAbundVector* rabund = NULL;
248 if(countfile != "") {
249 //map<string, int> nameMapCounts = m->readNames(namefile);
250 ct = new CountTable();
251 ct->readTable(countfile);
252 rabund = new RAbundVector();
253 createRabund(ct, list, rabund);
255 rabund = new RAbundVector(list->getRAbundVector());
259 //list = new ListVector(nameMap->getListVector());
260 //rabund = new RAbundVector(list->getRAbundVector());
262 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
266 map<string, int> Seq2Bin;
267 map<string, int> oldSeq2Bin;
269 if (method == "furthest") { tag = "fn"; }
270 else if (method == "nearest") { tag = "nn"; }
273 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
274 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
275 string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
277 m->openOutputFile(sabundFileName, sabundFile);
278 m->openOutputFile(rabundFileName, rabundFile);
279 m->openOutputFile(listFileName, listFile);
281 if (m->control_pressed) {
282 delete nameMap; delete read; delete list; delete rabund;
283 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
288 double saveCutoff = cutoff;
290 if (!hclusterWanted) {
291 //get distmatrix and overlap
292 SparseMatrix* distMatrix = read->getDistMatrix();
293 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
297 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
298 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
299 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
300 cluster->setMapWanted(true);
301 Seq2Bin = cluster->getSeqtoBin();
302 oldSeq2Bin = Seq2Bin;
304 if (m->control_pressed) {
305 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
306 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
311 //cluster using cluster classes
312 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
314 cluster->update(cutoff);
316 if (m->control_pressed) {
317 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
318 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
323 float dist = distMatrix->getSmallDist();
326 rndDist = m->ceilDist(dist, precision);
328 rndDist = m->roundDist(dist, precision);
331 if(previousDist <= 0.0000 && dist != previousDist){
332 oldList.setLabel("unique");
335 else if(rndDist != rndPreviousDist){
337 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
339 if (m->control_pressed) {
340 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
341 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
346 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
350 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
356 rndPreviousDist = rndDist;
358 Seq2Bin = cluster->getSeqtoBin();
359 oldSeq2Bin = Seq2Bin;
362 if(previousDist <= 0.0000){
363 oldList.setLabel("unique");
366 else if(rndPreviousDist<cutoff){
368 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
370 if (m->control_pressed) {
371 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
372 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
377 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
381 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
387 overlapMatrix.clear();
391 }else { //use hcluster to cluster
392 //get distmatrix and overlap
393 overlapFile = read->getOverlapFile();
394 distFile = read->getDistFile();
397 //sort the distance and overlap files
398 sortHclusterFiles(distFile, overlapFile);
400 if (m->control_pressed) {
401 delete nameMap; delete list; delete rabund;
402 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
408 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
409 hcluster->setMapWanted(true);
410 Seq2Bin = cluster->getSeqtoBin();
411 oldSeq2Bin = Seq2Bin;
413 vector<seqDist> seqs; seqs.resize(1); // to start loop
414 //ifstream inHcluster;
415 //m->openInputFile(distFile, inHcluster);
417 if (m->control_pressed) {
418 delete nameMap; delete list; delete rabund; delete hcluster;
419 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
424 while (seqs.size() != 0){
426 seqs = hcluster->getSeqs();
428 //to account for cutoff change in average neighbor
429 if (seqs.size() != 0) {
430 if (seqs[0].dist > cutoff) { break; }
433 if (m->control_pressed) {
434 delete nameMap; delete list; delete rabund; delete hcluster;
435 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
436 m->mothurRemove(distFile);
437 m->mothurRemove(overlapFile);
442 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
444 if (seqs[i].seq1 != seqs[i].seq2) {
446 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
448 if (m->control_pressed) {
449 delete nameMap; delete list; delete rabund; delete hcluster;
450 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
451 m->mothurRemove(distFile);
452 m->mothurRemove(overlapFile);
459 rndDist = m->ceilDist(seqs[i].dist, precision);
461 rndDist = m->roundDist(seqs[i].dist, precision);
464 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
465 oldList.setLabel("unique");
468 else if((rndDist != rndPreviousDist)){
470 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
472 if (m->control_pressed) {
473 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
474 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
475 m->mothurRemove(distFile);
476 m->mothurRemove(overlapFile);
481 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
485 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
490 previousDist = seqs[i].dist;
491 rndPreviousDist = rndDist;
493 Seq2Bin = cluster->getSeqtoBin();
494 oldSeq2Bin = Seq2Bin;
498 //inHcluster.close();
500 if(previousDist <= 0.0000){
501 oldList.setLabel("unique");
504 else if(rndPreviousDist<cutoff){
506 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
508 if (m->control_pressed) {
509 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
510 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
511 m->mothurRemove(distFile);
512 m->mothurRemove(overlapFile);
517 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
521 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
527 m->mothurRemove(distFile);
528 m->mothurRemove(overlapFile);
532 if (!large) {delete rabund;}
537 if (m->control_pressed) {
539 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
544 m->mothurOutEndLine();
545 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
546 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
547 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
548 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
549 m->mothurOutEndLine();
551 if (saveCutoff != cutoff) {
552 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
553 else { saveCutoff = m->roundDist(saveCutoff, precision); }
555 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
558 //set list file as new current listfile
560 itTypes = outputTypes.find("list");
561 if (itTypes != outputTypes.end()) {
562 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
565 //set rabund file as new current rabundfile
566 itTypes = outputTypes.find("rabund");
567 if (itTypes != outputTypes.end()) {
568 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
571 //set sabund file as new current sabundfile
572 itTypes = outputTypes.find("sabund");
573 if (itTypes != outputTypes.end()) {
574 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
578 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
582 catch(exception& e) {
583 m->errorOut(e, "MGClusterCommand", "execute");
587 //**********************************************************************************************************************
588 void MGClusterCommand::printData(ListVector* mergedList){
590 mergedList->print(listFile);
591 mergedList->getRAbundVector().print(rabundFile);
593 SAbundVector sabund = mergedList->getSAbundVector();
596 sabund.print(sabundFile);
598 catch(exception& e) {
599 m->errorOut(e, "MGClusterCommand", "printData");
603 //**********************************************************************************************************************
604 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
605 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
606 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
608 //create new listvector so you don't overwrite the clustering
609 ListVector* newList = new ListVector(oldList);
615 if (hclusterWanted) {
616 m->openInputFile(overlapFile, inOverlap);
617 if (inOverlap.eof()) { done = true; }
618 }else { if (overlapMatrix.size() == 0) { done = true; } }
621 if (m->control_pressed) {
622 if (hclusterWanted) { inOverlap.close(); }
628 if (!hclusterWanted) {
629 if (count < overlapMatrix.size()) { //do we have another node in the matrix
630 overlapNode = overlapMatrix[count];
634 if (!inOverlap.eof()) {
635 string firstName, secondName;
636 float overlapDistance;
637 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
639 //commented out because we check this in readblast already
640 //map<string,int>::iterator itA = nameMap->find(firstName);
641 //map<string,int>::iterator itB = nameMap->find(secondName);
642 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
643 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
645 //overlapNode.seq1 = itA->second;
646 //overlapNode.seq2 = itB->second;
647 overlapNode.seq1 = nameMap->get(firstName);
648 overlapNode.seq2 = nameMap->get(secondName);
649 overlapNode.dist = overlapDistance;
650 }else { inOverlap.close(); break; }
653 if (overlapNode.dist < dist) {
654 //get names of seqs that overlap
655 string name1 = nameMap->get(overlapNode.seq1);
656 string name2 = nameMap->get(overlapNode.seq2);
658 //use binInfo to find out if they are already in the same bin
659 //map<string, int>::iterator itBin1 = binInfo.find(name1);
660 //map<string, int>::iterator itBin2 = binInfo.find(name2);
662 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
663 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
665 //int binKeep = itBin1->second;
666 //int binRemove = itBin2->second;
668 int binKeep = binInfo[name1];
669 int binRemove = binInfo[name2];
671 //if not merge bins and update binInfo
672 if(binKeep != binRemove) {
674 //save names in old bin
675 string names = newList->get(binRemove);
677 //merge bins into name1s bin
678 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
679 newList->set(binRemove, "");
682 while (names.find_first_of(',') != -1) {
684 string name = names.substr(0,names.find_first_of(','));
685 //save name and bin number
686 binInfo[name] = binKeep;
687 names = names.substr(names.find_first_of(',')+1, names.length());
691 binInfo[names] = binKeep;
694 }else { done = true; }
701 catch(exception& e) {
702 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
706 //**********************************************************************************************************************
707 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
710 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
711 m->mothurRemove(unsortedDist); //delete unsorted file
712 distFile = sortedDistFile;
715 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
716 m->mothurRemove(unsortedOverlap); //delete unsorted file
717 overlapFile = sortedOverlapFile;
719 catch(exception& e) {
720 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
725 //**********************************************************************************************************************
727 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
729 //vector<string> names = ct.getNamesOfSeqs();
731 //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
734 for(int i = 0; i < list->getNumBins(); i++) {
735 vector<string> binNames;
736 string bin = list->get(i);
737 m->splitAtComma(bin, binNames);
739 for (int j = 0; j < binNames.size(); j++) {
740 total += ct->getNumSeqs(binNames[j]);
742 rabund->push_back(total);
747 catch(exception& e) {
748 m->errorOut(e, "MGClusterCommand", "createRabund");
754 //**********************************************************************************************************************