5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
18 CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputPattern(string type) {
69 if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
70 else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
71 else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
72 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
77 m->errorOut(e, "MGClusterCommand", "getOutputPattern");
81 //*******************************************************************************************************************
82 MGClusterCommand::MGClusterCommand(){
84 abort = true; calledHelp = true;
86 vector<string> tempOutNames;
87 outputTypes["list"] = tempOutNames;
88 outputTypes["rabund"] = tempOutNames;
89 outputTypes["sabund"] = tempOutNames;
92 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
96 //**********************************************************************************************************************
97 MGClusterCommand::MGClusterCommand(string option) {
99 abort = false; calledHelp = false;
101 //allow user to run help
102 if(option == "help") { help(); abort = true; calledHelp = true; }
103 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106 vector<string> myArray = setParameters();
108 OptionParser parser(option);
109 map<string, string> parameters = parser.getParameters();
111 ValidParameters validParameter;
112 map<string,string>::iterator it;
114 //check to make sure all parameters are valid for command
115 for (it = parameters.begin(); it != parameters.end(); it++) {
116 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
119 //initialize outputTypes
120 vector<string> tempOutNames;
121 outputTypes["list"] = tempOutNames;
122 outputTypes["rabund"] = tempOutNames;
123 outputTypes["sabund"] = tempOutNames;
125 //if the user changes the input directory command factory will send this info to us in the output parameter
126 string inputDir = validParameter.validFile(parameters, "inputdir", false);
127 if (inputDir == "not found"){ inputDir = ""; }
130 it = parameters.find("blast");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["blast"] = inputDir + it->second; }
138 it = parameters.find("name");
139 //user has given a template file
140 if(it != parameters.end()){
141 path = m->hasPath(it->second);
142 //if the user has not given a path then, add inputdir. else leave path alone.
143 if (path == "") { parameters["name"] = inputDir + it->second; }
146 it = parameters.find("count");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["count"] = inputDir + it->second; }
156 //check for required parameters
157 blastfile = validParameter.validFile(parameters, "blast", true);
158 if (blastfile == "not open") { blastfile = ""; abort = true; }
159 else if (blastfile == "not found") { blastfile = ""; }
161 //if the user changes the output directory command factory will send this info to us in the output parameter
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
164 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
167 namefile = validParameter.validFile(parameters, "name", true);
168 if (namefile == "not open") { abort = true; }
169 else if (namefile == "not found") { namefile = ""; }
170 else { m->setNameFile(namefile); }
172 countfile = validParameter.validFile(parameters, "count", true);
173 if (countfile == "not open") { abort = true; }
174 else if (countfile == "not found") { countfile = ""; }
175 else { m->setCountTableFile(countfile); }
177 if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
179 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
181 //check for optional parameter and set defaults
183 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
184 precisionLength = temp.length();
185 m->mothurConvert(temp, precision);
187 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
188 m->mothurConvert(temp, cutoff);
189 cutoff += (5 / (precision * 10.0));
191 method = validParameter.validFile(parameters, "method", false);
192 if (method == "not found") { method = "average"; }
194 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
195 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
197 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
198 m->mothurConvert(temp, length);
200 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
201 m->mothurConvert(temp, penalty);
203 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
204 minWanted = m->isTrue(temp);
206 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
207 merge = m->isTrue(temp);
209 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
210 hclusterWanted = m->isTrue(temp);
212 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
213 hard = m->isTrue(temp);
217 catch(exception& e) {
218 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
222 //**********************************************************************************************************************
223 int MGClusterCommand::execute(){
225 if (abort == true) { if (calledHelp) { return 0; } return 2; }
228 if (namefile != "") {
229 nameMap = new NameAssignment(namefile);
231 }else{ nameMap= new NameAssignment(); }
233 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
236 float previousDist = 0.00000;
237 float rndPreviousDist = 0.00000;
239 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
240 //must remember to delete those objects here since readBlast does not
241 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
244 list = new ListVector(nameMap->getListVector());
245 RAbundVector* rabund = NULL;
247 if(countfile != "") {
248 //map<string, int> nameMapCounts = m->readNames(namefile);
249 ct = new CountTable();
250 ct->readTable(countfile);
251 rabund = new RAbundVector();
252 createRabund(ct, list, rabund);
254 rabund = new RAbundVector(list->getRAbundVector());
258 //list = new ListVector(nameMap->getListVector());
259 //rabund = new RAbundVector(list->getRAbundVector());
261 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
265 map<string, int> Seq2Bin;
266 map<string, int> oldSeq2Bin;
268 if (method == "furthest") { tag = "fn"; }
269 else if (method == "nearest") { tag = "nn"; }
272 map<string, string> variables;
273 variables["[filename]"] = fileroot;
274 if (countfile != "") { variables["[tag2]"] = "unique_list"; }
275 variables["[clustertag]"] = tag;
276 string sabundFileName = getOutputFileName("sabund", variables);
277 string rabundFileName = getOutputFileName("rabund", variables);
278 string listFileName = getOutputFileName("list", variables);
280 if (countfile == "") {
281 m->openOutputFile(sabundFileName, sabundFile);
282 m->openOutputFile(rabundFileName, rabundFile);
284 m->openOutputFile(listFileName, listFile);
286 if (m->control_pressed) {
287 delete nameMap; delete read; delete list; delete rabund;
288 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
293 double saveCutoff = cutoff;
295 if (!hclusterWanted) {
296 //get distmatrix and overlap
297 SparseDistanceMatrix* distMatrix = read->getDistMatrix();
298 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
302 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
303 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
304 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
305 cluster->setMapWanted(true);
306 Seq2Bin = cluster->getSeqtoBin();
307 oldSeq2Bin = Seq2Bin;
309 if (m->control_pressed) {
310 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
311 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
316 //cluster using cluster classes
317 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
319 cluster->update(cutoff);
321 if (m->control_pressed) {
322 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
323 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
328 float dist = distMatrix->getSmallDist();
331 rndDist = m->ceilDist(dist, precision);
333 rndDist = m->roundDist(dist, precision);
336 if(previousDist <= 0.0000 && dist != previousDist){
337 oldList.setLabel("unique");
340 else if(rndDist != rndPreviousDist){
342 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
344 if (m->control_pressed) {
345 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
346 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
351 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
355 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
361 rndPreviousDist = rndDist;
363 Seq2Bin = cluster->getSeqtoBin();
364 oldSeq2Bin = Seq2Bin;
367 if(previousDist <= 0.0000){
368 oldList.setLabel("unique");
371 else if(rndPreviousDist<cutoff){
373 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
375 if (m->control_pressed) {
376 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
377 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
382 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
386 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
392 overlapMatrix.clear();
396 }else { //use hcluster to cluster
397 //get distmatrix and overlap
398 overlapFile = read->getOverlapFile();
399 distFile = read->getDistFile();
402 //sort the distance and overlap files
403 sortHclusterFiles(distFile, overlapFile);
405 if (m->control_pressed) {
406 delete nameMap; delete list; delete rabund;
407 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
413 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
414 hcluster->setMapWanted(true);
415 Seq2Bin = cluster->getSeqtoBin();
416 oldSeq2Bin = Seq2Bin;
418 vector<seqDist> seqs; seqs.resize(1); // to start loop
419 //ifstream inHcluster;
420 //m->openInputFile(distFile, inHcluster);
422 if (m->control_pressed) {
423 delete nameMap; delete list; delete rabund; delete hcluster;
424 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
429 while (seqs.size() != 0){
431 seqs = hcluster->getSeqs();
433 //to account for cutoff change in average neighbor
434 if (seqs.size() != 0) {
435 if (seqs[0].dist > cutoff) { break; }
438 if (m->control_pressed) {
439 delete nameMap; delete list; delete rabund; delete hcluster;
440 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
441 m->mothurRemove(distFile);
442 m->mothurRemove(overlapFile);
447 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
449 if (seqs[i].seq1 != seqs[i].seq2) {
451 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
453 if (m->control_pressed) {
454 delete nameMap; delete list; delete rabund; delete hcluster;
455 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
456 m->mothurRemove(distFile);
457 m->mothurRemove(overlapFile);
464 rndDist = m->ceilDist(seqs[i].dist, precision);
466 rndDist = m->roundDist(seqs[i].dist, precision);
469 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
470 oldList.setLabel("unique");
473 else if((rndDist != rndPreviousDist)){
475 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
477 if (m->control_pressed) {
478 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
479 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
480 m->mothurRemove(distFile);
481 m->mothurRemove(overlapFile);
486 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
490 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
495 previousDist = seqs[i].dist;
496 rndPreviousDist = rndDist;
498 Seq2Bin = cluster->getSeqtoBin();
499 oldSeq2Bin = Seq2Bin;
503 //inHcluster.close();
505 if(previousDist <= 0.0000){
506 oldList.setLabel("unique");
509 else if(rndPreviousDist<cutoff){
511 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
513 if (m->control_pressed) {
514 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
515 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
516 m->mothurRemove(distFile);
517 m->mothurRemove(overlapFile);
522 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
526 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
532 m->mothurRemove(distFile);
533 m->mothurRemove(overlapFile);
539 if (countfile == "") {
543 if (m->control_pressed) {
545 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
550 m->mothurOutEndLine();
551 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
552 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
553 if (countfile == "") {
554 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
555 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
557 m->mothurOutEndLine();
559 if (saveCutoff != cutoff) {
560 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
561 else { saveCutoff = m->roundDist(saveCutoff, precision); }
563 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
566 //set list file as new current listfile
568 itTypes = outputTypes.find("list");
569 if (itTypes != outputTypes.end()) {
570 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
573 //set rabund file as new current rabundfile
574 itTypes = outputTypes.find("rabund");
575 if (itTypes != outputTypes.end()) {
576 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
579 //set sabund file as new current sabundfile
580 itTypes = outputTypes.find("sabund");
581 if (itTypes != outputTypes.end()) {
582 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
586 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
590 catch(exception& e) {
591 m->errorOut(e, "MGClusterCommand", "execute");
595 //**********************************************************************************************************************
596 void MGClusterCommand::printData(ListVector* mergedList){
598 mergedList->print(listFile);
599 SAbundVector sabund = mergedList->getSAbundVector();
601 if (countfile == "") {
602 mergedList->getRAbundVector().print(rabundFile);
603 sabund.print(sabundFile);
608 catch(exception& e) {
609 m->errorOut(e, "MGClusterCommand", "printData");
613 //**********************************************************************************************************************
614 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
615 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
616 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
618 //create new listvector so you don't overwrite the clustering
619 ListVector* newList = new ListVector(oldList);
625 if (hclusterWanted) {
626 m->openInputFile(overlapFile, inOverlap);
627 if (inOverlap.eof()) { done = true; }
628 }else { if (overlapMatrix.size() == 0) { done = true; } }
631 if (m->control_pressed) {
632 if (hclusterWanted) { inOverlap.close(); }
638 if (!hclusterWanted) {
639 if (count < overlapMatrix.size()) { //do we have another node in the matrix
640 overlapNode = overlapMatrix[count];
644 if (!inOverlap.eof()) {
645 string firstName, secondName;
646 float overlapDistance;
647 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
649 //commented out because we check this in readblast already
650 //map<string,int>::iterator itA = nameMap->find(firstName);
651 //map<string,int>::iterator itB = nameMap->find(secondName);
652 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
653 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
655 //overlapNode.seq1 = itA->second;
656 //overlapNode.seq2 = itB->second;
657 overlapNode.seq1 = nameMap->get(firstName);
658 overlapNode.seq2 = nameMap->get(secondName);
659 overlapNode.dist = overlapDistance;
660 }else { inOverlap.close(); break; }
663 if (overlapNode.dist < dist) {
664 //get names of seqs that overlap
665 string name1 = nameMap->get(overlapNode.seq1);
666 string name2 = nameMap->get(overlapNode.seq2);
668 //use binInfo to find out if they are already in the same bin
669 //map<string, int>::iterator itBin1 = binInfo.find(name1);
670 //map<string, int>::iterator itBin2 = binInfo.find(name2);
672 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
673 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
675 //int binKeep = itBin1->second;
676 //int binRemove = itBin2->second;
678 int binKeep = binInfo[name1];
679 int binRemove = binInfo[name2];
681 //if not merge bins and update binInfo
682 if(binKeep != binRemove) {
684 //save names in old bin
685 string names = newList->get(binRemove);
687 //merge bins into name1s bin
688 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
689 newList->set(binRemove, "");
692 while (names.find_first_of(',') != -1) {
694 string name = names.substr(0,names.find_first_of(','));
695 //save name and bin number
696 binInfo[name] = binKeep;
697 names = names.substr(names.find_first_of(',')+1, names.length());
701 binInfo[names] = binKeep;
704 }else { done = true; }
711 catch(exception& e) {
712 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
716 //**********************************************************************************************************************
717 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
720 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
721 m->mothurRemove(unsortedDist); //delete unsorted file
722 distFile = sortedDistFile;
725 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
726 m->mothurRemove(unsortedOverlap); //delete unsorted file
727 overlapFile = sortedOverlapFile;
729 catch(exception& e) {
730 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
735 //**********************************************************************************************************************
737 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
739 //vector<string> names = ct.getNamesOfSeqs();
741 //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
744 for(int i = 0; i < list->getNumBins(); i++) {
745 vector<string> binNames;
746 string bin = list->get(i);
747 m->splitAtComma(bin, binNames);
749 for (int j = 0; j < binNames.size(); j++) {
750 total += ct->getNumSeqs(binNames[j]);
752 rabund->push_back(total);
757 catch(exception& e) {
758 m->errorOut(e, "MGClusterCommand", "createRabund");
764 //**********************************************************************************************************************