5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
18 CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputPattern(string type) {
69 if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
70 else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
71 else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
72 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
77 m->errorOut(e, "MGClusterCommand", "getOutputPattern");
81 //*******************************************************************************************************************
82 MGClusterCommand::MGClusterCommand(){
84 abort = true; calledHelp = true;
86 vector<string> tempOutNames;
87 outputTypes["list"] = tempOutNames;
88 outputTypes["rabund"] = tempOutNames;
89 outputTypes["sabund"] = tempOutNames;
92 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
96 //**********************************************************************************************************************
97 MGClusterCommand::MGClusterCommand(string option) {
99 abort = false; calledHelp = false;
101 //allow user to run help
102 if(option == "help") { help(); abort = true; calledHelp = true; }
103 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106 vector<string> myArray = setParameters();
108 OptionParser parser(option);
109 map<string, string> parameters = parser.getParameters();
111 ValidParameters validParameter;
112 map<string,string>::iterator it;
114 //check to make sure all parameters are valid for command
115 for (it = parameters.begin(); it != parameters.end(); it++) {
116 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
119 //initialize outputTypes
120 vector<string> tempOutNames;
121 outputTypes["list"] = tempOutNames;
122 outputTypes["rabund"] = tempOutNames;
123 outputTypes["sabund"] = tempOutNames;
125 //if the user changes the input directory command factory will send this info to us in the output parameter
126 string inputDir = validParameter.validFile(parameters, "inputdir", false);
127 if (inputDir == "not found"){ inputDir = ""; }
130 it = parameters.find("blast");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["blast"] = inputDir + it->second; }
138 it = parameters.find("name");
139 //user has given a template file
140 if(it != parameters.end()){
141 path = m->hasPath(it->second);
142 //if the user has not given a path then, add inputdir. else leave path alone.
143 if (path == "") { parameters["name"] = inputDir + it->second; }
146 it = parameters.find("count");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["count"] = inputDir + it->second; }
156 //check for required parameters
157 blastfile = validParameter.validFile(parameters, "blast", true);
158 if (blastfile == "not open") { blastfile = ""; abort = true; }
159 else if (blastfile == "not found") { blastfile = ""; }
161 //if the user changes the output directory command factory will send this info to us in the output parameter
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
164 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
167 namefile = validParameter.validFile(parameters, "name", true);
168 if (namefile == "not open") { abort = true; }
169 else if (namefile == "not found") { namefile = ""; }
170 else { m->setNameFile(namefile); }
172 countfile = validParameter.validFile(parameters, "count", true);
173 if (countfile == "not open") { abort = true; }
174 else if (countfile == "not found") { countfile = ""; }
175 else { m->setCountTableFile(countfile); }
177 if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
179 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
181 //check for optional parameter and set defaults
183 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
184 precisionLength = temp.length();
185 m->mothurConvert(temp, precision);
187 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
188 m->mothurConvert(temp, cutoff);
189 cutoff += (5 / (precision * 10.0));
191 method = validParameter.validFile(parameters, "method", false);
192 if (method == "not found") { method = "average"; }
194 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
195 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
197 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
198 m->mothurConvert(temp, length);
200 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
201 m->mothurConvert(temp, penalty);
203 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
204 minWanted = m->isTrue(temp);
206 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
207 merge = m->isTrue(temp);
209 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
210 hclusterWanted = m->isTrue(temp);
212 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
213 hard = m->isTrue(temp);
217 catch(exception& e) {
218 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
222 //**********************************************************************************************************************
223 int MGClusterCommand::execute(){
225 if (abort == true) { if (calledHelp) { return 0; } return 2; }
228 if (namefile != "") {
229 nameMap = new NameAssignment(namefile);
231 }else{ nameMap= new NameAssignment(); }
233 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
236 float previousDist = 0.00000;
237 float rndPreviousDist = 0.00000;
239 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
240 //must remember to delete those objects here since readBlast does not
241 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
244 list = new ListVector(nameMap->getListVector());
245 RAbundVector* rabund = NULL;
247 if(countfile != "") {
248 //map<string, int> nameMapCounts = m->readNames(namefile);
249 ct = new CountTable();
250 ct->readTable(countfile, false);
251 rabund = new RAbundVector();
252 createRabund(ct, list, rabund);
254 rabund = new RAbundVector(list->getRAbundVector());
258 //list = new ListVector(nameMap->getListVector());
259 //rabund = new RAbundVector(list->getRAbundVector());
261 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
265 map<string, int> Seq2Bin;
266 map<string, int> oldSeq2Bin;
268 if (method == "furthest") { tag = "fn"; }
269 else if (method == "nearest") { tag = "nn"; }
272 map<string, string> variables;
273 variables["[filename]"] = fileroot;
274 variables["[clustertag]"] = tag;
275 string sabundFileName = getOutputFileName("sabund", variables);
276 string rabundFileName = getOutputFileName("rabund", variables);
277 if (countfile != "") { variables["[tag2]"] = "unique_list"; }
278 string listFileName = getOutputFileName("list", variables);
280 if (countfile == "") {
281 m->openOutputFile(sabundFileName, sabundFile);
282 m->openOutputFile(rabundFileName, rabundFile);
284 m->openOutputFile(listFileName, listFile);
286 if (m->control_pressed) {
287 delete nameMap; delete read; delete list; delete rabund;
288 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
293 double saveCutoff = cutoff;
295 if (!hclusterWanted) {
296 //get distmatrix and overlap
297 SparseDistanceMatrix* distMatrix = read->getDistMatrix();
298 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
302 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
303 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
304 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
305 cluster->setMapWanted(true);
306 Seq2Bin = cluster->getSeqtoBin();
307 oldSeq2Bin = Seq2Bin;
309 if (m->control_pressed) {
310 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
311 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
317 //cluster using cluster classes
318 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
320 if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
322 cluster->update(cutoff);
324 if (m->control_pressed) {
325 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
326 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
331 float dist = distMatrix->getSmallDist();
334 rndDist = m->ceilDist(dist, precision);
336 rndDist = m->roundDist(dist, precision);
339 if(previousDist <= 0.0000 && dist != previousDist){
340 oldList.setLabel("unique");
343 else if(rndDist != rndPreviousDist){
345 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
347 if (m->control_pressed) {
348 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
349 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
354 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
358 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
364 rndPreviousDist = rndDist;
366 Seq2Bin = cluster->getSeqtoBin();
367 oldSeq2Bin = Seq2Bin;
370 if(previousDist <= 0.0000){
371 oldList.setLabel("unique");
374 else if(rndPreviousDist<cutoff){
376 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
378 if (m->control_pressed) {
379 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
380 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
385 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
389 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
395 overlapMatrix.clear();
399 }else { //use hcluster to cluster
400 //get distmatrix and overlap
401 overlapFile = read->getOverlapFile();
402 distFile = read->getDistFile();
405 //sort the distance and overlap files
406 sortHclusterFiles(distFile, overlapFile);
408 if (m->control_pressed) {
409 delete nameMap; delete list; delete rabund;
410 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
416 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
417 hcluster->setMapWanted(true);
418 Seq2Bin = cluster->getSeqtoBin();
419 oldSeq2Bin = Seq2Bin;
421 vector<seqDist> seqs; seqs.resize(1); // to start loop
422 //ifstream inHcluster;
423 //m->openInputFile(distFile, inHcluster);
425 if (m->control_pressed) {
426 delete nameMap; delete list; delete rabund; delete hcluster;
427 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
432 while (seqs.size() != 0){
434 seqs = hcluster->getSeqs();
436 //to account for cutoff change in average neighbor
437 if (seqs.size() != 0) {
438 if (seqs[0].dist > cutoff) { break; }
441 if (m->control_pressed) {
442 delete nameMap; delete list; delete rabund; delete hcluster;
443 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
444 m->mothurRemove(distFile);
445 m->mothurRemove(overlapFile);
450 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
452 if (seqs[i].seq1 != seqs[i].seq2) {
454 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
456 if (m->control_pressed) {
457 delete nameMap; delete list; delete rabund; delete hcluster;
458 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
459 m->mothurRemove(distFile);
460 m->mothurRemove(overlapFile);
467 rndDist = m->ceilDist(seqs[i].dist, precision);
469 rndDist = m->roundDist(seqs[i].dist, precision);
472 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
473 oldList.setLabel("unique");
476 else if((rndDist != rndPreviousDist)){
478 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
480 if (m->control_pressed) {
481 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
482 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
483 m->mothurRemove(distFile);
484 m->mothurRemove(overlapFile);
489 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
493 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
498 previousDist = seqs[i].dist;
499 rndPreviousDist = rndDist;
501 Seq2Bin = cluster->getSeqtoBin();
502 oldSeq2Bin = Seq2Bin;
506 //inHcluster.close();
508 if(previousDist <= 0.0000){
509 oldList.setLabel("unique");
512 else if(rndPreviousDist<cutoff){
514 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
516 if (m->control_pressed) {
517 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
518 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
519 m->mothurRemove(distFile);
520 m->mothurRemove(overlapFile);
525 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
529 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
535 m->mothurRemove(distFile);
536 m->mothurRemove(overlapFile);
542 if (countfile == "") {
546 if (m->control_pressed) {
548 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
553 m->mothurOutEndLine();
554 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
555 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
556 if (countfile == "") {
557 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
558 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
560 m->mothurOutEndLine();
562 if (saveCutoff != cutoff) {
563 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
564 else { saveCutoff = m->roundDist(saveCutoff, precision); }
566 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
569 //set list file as new current listfile
571 itTypes = outputTypes.find("list");
572 if (itTypes != outputTypes.end()) {
573 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
576 //set rabund file as new current rabundfile
577 itTypes = outputTypes.find("rabund");
578 if (itTypes != outputTypes.end()) {
579 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
582 //set sabund file as new current sabundfile
583 itTypes = outputTypes.find("sabund");
584 if (itTypes != outputTypes.end()) {
585 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
589 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
593 catch(exception& e) {
594 m->errorOut(e, "MGClusterCommand", "execute");
598 //**********************************************************************************************************************
599 void MGClusterCommand::printData(ListVector* mergedList){
601 mergedList->print(listFile);
602 SAbundVector sabund = mergedList->getSAbundVector();
604 if (countfile == "") {
605 mergedList->getRAbundVector().print(rabundFile);
606 sabund.print(sabundFile);
611 catch(exception& e) {
612 m->errorOut(e, "MGClusterCommand", "printData");
616 //**********************************************************************************************************************
617 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
618 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
619 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
621 //create new listvector so you don't overwrite the clustering
622 ListVector* newList = new ListVector(oldList);
628 if (hclusterWanted) {
629 m->openInputFile(overlapFile, inOverlap);
630 if (inOverlap.eof()) { done = true; }
631 }else { if (overlapMatrix.size() == 0) { done = true; } }
634 if (m->control_pressed) {
635 if (hclusterWanted) { inOverlap.close(); }
641 if (!hclusterWanted) {
642 if (count < overlapMatrix.size()) { //do we have another node in the matrix
643 overlapNode = overlapMatrix[count];
647 if (!inOverlap.eof()) {
648 string firstName, secondName;
649 float overlapDistance;
650 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
652 //commented out because we check this in readblast already
653 //map<string,int>::iterator itA = nameMap->find(firstName);
654 //map<string,int>::iterator itB = nameMap->find(secondName);
655 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
656 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
658 //overlapNode.seq1 = itA->second;
659 //overlapNode.seq2 = itB->second;
660 overlapNode.seq1 = nameMap->get(firstName);
661 overlapNode.seq2 = nameMap->get(secondName);
662 overlapNode.dist = overlapDistance;
663 }else { inOverlap.close(); break; }
666 if (overlapNode.dist < dist) {
667 //get names of seqs that overlap
668 string name1 = nameMap->get(overlapNode.seq1);
669 string name2 = nameMap->get(overlapNode.seq2);
671 //use binInfo to find out if they are already in the same bin
672 //map<string, int>::iterator itBin1 = binInfo.find(name1);
673 //map<string, int>::iterator itBin2 = binInfo.find(name2);
675 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
676 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
678 //int binKeep = itBin1->second;
679 //int binRemove = itBin2->second;
681 int binKeep = binInfo[name1];
682 int binRemove = binInfo[name2];
684 //if not merge bins and update binInfo
685 if(binKeep != binRemove) {
687 //save names in old bin
688 string names = newList->get(binRemove);
690 //merge bins into name1s bin
691 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
692 newList->set(binRemove, "");
695 while (names.find_first_of(',') != -1) {
697 string name = names.substr(0,names.find_first_of(','));
698 //save name and bin number
699 binInfo[name] = binKeep;
700 names = names.substr(names.find_first_of(',')+1, names.length());
704 binInfo[names] = binKeep;
707 }else { done = true; }
714 catch(exception& e) {
715 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
719 //**********************************************************************************************************************
720 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
723 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
724 m->mothurRemove(unsortedDist); //delete unsorted file
725 distFile = sortedDistFile;
728 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
729 m->mothurRemove(unsortedOverlap); //delete unsorted file
730 overlapFile = sortedOverlapFile;
732 catch(exception& e) {
733 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
738 //**********************************************************************************************************************
740 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
742 //vector<string> names = ct.getNamesOfSeqs();
744 //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
747 for(int i = 0; i < list->getNumBins(); i++) {
748 vector<string> binNames;
749 string bin = list->get(i);
750 m->splitAtComma(bin, binNames);
752 for (int j = 0; j < binNames.size(); j++) {
753 total += ct->getNumSeqs(binNames[j]);
755 rabund->push_back(total);
760 catch(exception& e) {
761 m->errorOut(e, "MGClusterCommand", "createRabund");
767 //**********************************************************************************************************************