5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
18 CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputPattern(string type) {
69 if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
70 else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
71 else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
72 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
77 m->errorOut(e, "MGClusterCommand", "getOutputPattern");
81 //*******************************************************************************************************************
82 MGClusterCommand::MGClusterCommand(){
84 abort = true; calledHelp = true;
86 vector<string> tempOutNames;
87 outputTypes["list"] = tempOutNames;
88 outputTypes["rabund"] = tempOutNames;
89 outputTypes["sabund"] = tempOutNames;
92 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
96 //**********************************************************************************************************************
97 MGClusterCommand::MGClusterCommand(string option) {
99 abort = false; calledHelp = false;
101 //allow user to run help
102 if(option == "help") { help(); abort = true; calledHelp = true; }
103 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106 vector<string> myArray = setParameters();
108 OptionParser parser(option);
109 map<string, string> parameters = parser.getParameters();
111 ValidParameters validParameter;
112 map<string,string>::iterator it;
114 //check to make sure all parameters are valid for command
115 for (it = parameters.begin(); it != parameters.end(); it++) {
116 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
119 //initialize outputTypes
120 vector<string> tempOutNames;
121 outputTypes["list"] = tempOutNames;
122 outputTypes["rabund"] = tempOutNames;
123 outputTypes["sabund"] = tempOutNames;
125 //if the user changes the input directory command factory will send this info to us in the output parameter
126 string inputDir = validParameter.validFile(parameters, "inputdir", false);
127 if (inputDir == "not found"){ inputDir = ""; }
130 it = parameters.find("blast");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["blast"] = inputDir + it->second; }
138 it = parameters.find("name");
139 //user has given a template file
140 if(it != parameters.end()){
141 path = m->hasPath(it->second);
142 //if the user has not given a path then, add inputdir. else leave path alone.
143 if (path == "") { parameters["name"] = inputDir + it->second; }
146 it = parameters.find("count");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["count"] = inputDir + it->second; }
156 //check for required parameters
157 blastfile = validParameter.validFile(parameters, "blast", true);
158 if (blastfile == "not open") { blastfile = ""; abort = true; }
159 else if (blastfile == "not found") { blastfile = ""; }
161 //if the user changes the output directory command factory will send this info to us in the output parameter
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
164 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
167 namefile = validParameter.validFile(parameters, "name", true);
168 if (namefile == "not open") { abort = true; }
169 else if (namefile == "not found") { namefile = ""; }
170 else { m->setNameFile(namefile); }
172 countfile = validParameter.validFile(parameters, "count", true);
173 if (countfile == "not open") { abort = true; }
174 else if (countfile == "not found") { countfile = ""; }
175 else { m->setCountTableFile(countfile); }
177 if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
179 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
181 //check for optional parameter and set defaults
183 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
184 precisionLength = temp.length();
185 m->mothurConvert(temp, precision);
187 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
188 m->mothurConvert(temp, cutoff);
189 cutoff += (5 / (precision * 10.0));
191 method = validParameter.validFile(parameters, "method", false);
192 if (method == "not found") { method = "average"; }
194 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
195 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
197 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
198 m->mothurConvert(temp, length);
200 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
201 m->mothurConvert(temp, penalty);
203 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
204 minWanted = m->isTrue(temp);
206 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
207 merge = m->isTrue(temp);
209 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
210 hclusterWanted = m->isTrue(temp);
212 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
213 hard = m->isTrue(temp);
217 catch(exception& e) {
218 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
222 //**********************************************************************************************************************
223 int MGClusterCommand::execute(){
225 if (abort == true) { if (calledHelp) { return 0; } return 2; }
228 if (namefile != "") {
229 nameMap = new NameAssignment(namefile);
231 }else if (countfile != "") {
232 ct = new CountTable();
233 ct->readTable(countfile, false);
234 nameMap= new NameAssignment();
235 vector<string> tempNames = ct->getNamesOfSeqs();
236 for (int i = 0; i < tempNames.size(); i++) { nameMap->push_back(tempNames[i]); }
237 }else{ nameMap= new NameAssignment(); }
239 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
242 float previousDist = 0.00000;
243 float rndPreviousDist = 0.00000;
245 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
246 //must remember to delete those objects here since readBlast does not
247 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
250 list = new ListVector(nameMap->getListVector());
251 RAbundVector* rabund = NULL;
253 if(countfile != "") {
254 rabund = new RAbundVector();
255 createRabund(ct, list, rabund);
257 rabund = new RAbundVector(list->getRAbundVector());
261 //list = new ListVector(nameMap->getListVector());
262 //rabund = new RAbundVector(list->getRAbundVector());
264 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
268 map<string, int> Seq2Bin;
269 map<string, int> oldSeq2Bin;
271 if (method == "furthest") { tag = "fn"; }
272 else if (method == "nearest") { tag = "nn"; }
275 map<string, string> variables;
276 variables["[filename]"] = fileroot;
277 variables["[clustertag]"] = tag;
278 string sabundFileName = getOutputFileName("sabund", variables);
279 string rabundFileName = getOutputFileName("rabund", variables);
280 if (countfile != "") { variables["[tag2]"] = "unique_list"; }
281 string listFileName = getOutputFileName("list", variables);
283 if (countfile == "") {
284 m->openOutputFile(sabundFileName, sabundFile);
285 m->openOutputFile(rabundFileName, rabundFile);
287 m->openOutputFile(listFileName, listFile);
289 if (m->control_pressed) {
290 delete nameMap; delete read; delete list; delete rabund;
291 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
296 double saveCutoff = cutoff;
298 if (!hclusterWanted) {
299 //get distmatrix and overlap
300 SparseDistanceMatrix* distMatrix = read->getDistMatrix();
301 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
305 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
306 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
307 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
308 cluster->setMapWanted(true);
309 Seq2Bin = cluster->getSeqtoBin();
310 oldSeq2Bin = Seq2Bin;
312 if (m->control_pressed) {
313 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
314 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
320 //cluster using cluster classes
321 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
323 if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
325 cluster->update(cutoff);
327 if (m->control_pressed) {
328 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
329 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
334 float dist = distMatrix->getSmallDist();
337 rndDist = m->ceilDist(dist, precision);
339 rndDist = m->roundDist(dist, precision);
342 if(previousDist <= 0.0000 && dist != previousDist){
343 oldList.setLabel("unique");
346 else if(rndDist != rndPreviousDist){
348 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
350 if (m->control_pressed) {
351 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
352 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
357 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
361 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
367 rndPreviousDist = rndDist;
369 Seq2Bin = cluster->getSeqtoBin();
370 oldSeq2Bin = Seq2Bin;
373 if(previousDist <= 0.0000){
374 oldList.setLabel("unique");
377 else if(rndPreviousDist<cutoff){
379 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
381 if (m->control_pressed) {
382 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
383 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
388 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
392 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
398 overlapMatrix.clear();
402 }else { //use hcluster to cluster
403 //get distmatrix and overlap
404 overlapFile = read->getOverlapFile();
405 distFile = read->getDistFile();
408 //sort the distance and overlap files
409 sortHclusterFiles(distFile, overlapFile);
411 if (m->control_pressed) {
412 delete nameMap; delete list; delete rabund;
413 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
419 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
420 hcluster->setMapWanted(true);
421 Seq2Bin = cluster->getSeqtoBin();
422 oldSeq2Bin = Seq2Bin;
424 vector<seqDist> seqs; seqs.resize(1); // to start loop
425 //ifstream inHcluster;
426 //m->openInputFile(distFile, inHcluster);
428 if (m->control_pressed) {
429 delete nameMap; delete list; delete rabund; delete hcluster;
430 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
435 while (seqs.size() != 0){
437 seqs = hcluster->getSeqs();
439 //to account for cutoff change in average neighbor
440 if (seqs.size() != 0) {
441 if (seqs[0].dist > cutoff) { break; }
444 if (m->control_pressed) {
445 delete nameMap; delete list; delete rabund; delete hcluster;
446 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
447 m->mothurRemove(distFile);
448 m->mothurRemove(overlapFile);
453 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
455 if (seqs[i].seq1 != seqs[i].seq2) {
457 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
459 if (m->control_pressed) {
460 delete nameMap; delete list; delete rabund; delete hcluster;
461 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
462 m->mothurRemove(distFile);
463 m->mothurRemove(overlapFile);
470 rndDist = m->ceilDist(seqs[i].dist, precision);
472 rndDist = m->roundDist(seqs[i].dist, precision);
475 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
476 oldList.setLabel("unique");
479 else if((rndDist != rndPreviousDist)){
481 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
483 if (m->control_pressed) {
484 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
485 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
486 m->mothurRemove(distFile);
487 m->mothurRemove(overlapFile);
492 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
496 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
501 previousDist = seqs[i].dist;
502 rndPreviousDist = rndDist;
504 Seq2Bin = cluster->getSeqtoBin();
505 oldSeq2Bin = Seq2Bin;
509 //inHcluster.close();
511 if(previousDist <= 0.0000){
512 oldList.setLabel("unique");
515 else if(rndPreviousDist<cutoff){
517 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
519 if (m->control_pressed) {
520 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
521 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
522 m->mothurRemove(distFile);
523 m->mothurRemove(overlapFile);
528 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
532 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
538 m->mothurRemove(distFile);
539 m->mothurRemove(overlapFile);
545 if (countfile == "") {
549 if (m->control_pressed) {
551 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
556 m->mothurOutEndLine();
557 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
558 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
559 if (countfile == "") {
560 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
561 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
563 m->mothurOutEndLine();
565 if (saveCutoff != cutoff) {
566 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
567 else { saveCutoff = m->roundDist(saveCutoff, precision); }
569 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
572 //set list file as new current listfile
574 itTypes = outputTypes.find("list");
575 if (itTypes != outputTypes.end()) {
576 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
579 //set rabund file as new current rabundfile
580 itTypes = outputTypes.find("rabund");
581 if (itTypes != outputTypes.end()) {
582 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
585 //set sabund file as new current sabundfile
586 itTypes = outputTypes.find("sabund");
587 if (itTypes != outputTypes.end()) {
588 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
592 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
596 catch(exception& e) {
597 m->errorOut(e, "MGClusterCommand", "execute");
601 //**********************************************************************************************************************
602 void MGClusterCommand::printData(ListVector* mergedList){
604 mergedList->print(listFile);
605 SAbundVector sabund = mergedList->getSAbundVector();
607 if (countfile == "") {
608 mergedList->getRAbundVector().print(rabundFile);
609 sabund.print(sabundFile);
614 catch(exception& e) {
615 m->errorOut(e, "MGClusterCommand", "printData");
619 //**********************************************************************************************************************
620 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
621 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
622 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
624 //create new listvector so you don't overwrite the clustering
625 ListVector* newList = new ListVector(oldList);
631 if (hclusterWanted) {
632 m->openInputFile(overlapFile, inOverlap);
633 if (inOverlap.eof()) { done = true; }
634 }else { if (overlapMatrix.size() == 0) { done = true; } }
637 if (m->control_pressed) {
638 if (hclusterWanted) { inOverlap.close(); }
644 if (!hclusterWanted) {
645 if (count < overlapMatrix.size()) { //do we have another node in the matrix
646 overlapNode = overlapMatrix[count];
650 if (!inOverlap.eof()) {
651 string firstName, secondName;
652 float overlapDistance;
653 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
655 //commented out because we check this in readblast already
656 //map<string,int>::iterator itA = nameMap->find(firstName);
657 //map<string,int>::iterator itB = nameMap->find(secondName);
658 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
659 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
661 //overlapNode.seq1 = itA->second;
662 //overlapNode.seq2 = itB->second;
663 overlapNode.seq1 = nameMap->get(firstName);
664 overlapNode.seq2 = nameMap->get(secondName);
665 overlapNode.dist = overlapDistance;
666 }else { inOverlap.close(); break; }
669 if (overlapNode.dist < dist) {
670 //get names of seqs that overlap
671 string name1 = nameMap->get(overlapNode.seq1);
672 string name2 = nameMap->get(overlapNode.seq2);
674 //use binInfo to find out if they are already in the same bin
675 //map<string, int>::iterator itBin1 = binInfo.find(name1);
676 //map<string, int>::iterator itBin2 = binInfo.find(name2);
678 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
679 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
681 //int binKeep = itBin1->second;
682 //int binRemove = itBin2->second;
684 int binKeep = binInfo[name1];
685 int binRemove = binInfo[name2];
687 //if not merge bins and update binInfo
688 if(binKeep != binRemove) {
690 //save names in old bin
691 string names = newList->get(binRemove);
693 //merge bins into name1s bin
694 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
695 newList->set(binRemove, "");
698 while (names.find_first_of(',') != -1) {
700 string name = names.substr(0,names.find_first_of(','));
701 //save name and bin number
702 binInfo[name] = binKeep;
703 names = names.substr(names.find_first_of(',')+1, names.length());
707 binInfo[names] = binKeep;
710 }else { done = true; }
717 catch(exception& e) {
718 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
722 //**********************************************************************************************************************
723 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
726 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
727 m->mothurRemove(unsortedDist); //delete unsorted file
728 distFile = sortedDistFile;
731 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
732 m->mothurRemove(unsortedOverlap); //delete unsorted file
733 overlapFile = sortedOverlapFile;
735 catch(exception& e) {
736 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
741 //**********************************************************************************************************************
743 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
745 //vector<string> names = ct.getNamesOfSeqs();
747 //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
750 for(int i = 0; i < list->getNumBins(); i++) {
751 vector<string> binNames;
752 string bin = list->get(i);
753 m->splitAtComma(bin, binNames);
755 for (int j = 0; j < binNames.size(); j++) {
756 total += ct->getNumSeqs(binNames[j]);
758 rabund->push_back(total);
763 catch(exception& e) {
764 m->errorOut(e, "MGClusterCommand", "createRabund");
770 //**********************************************************************************************************************