5 * Created by westcott on 12/11/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "mgclustercommand.h"
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){
15 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
17 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount);
18 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "MGClusterCommand", "setParameters");
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47 helpString += "The precision parameter's default value is 100. \n";
48 helpString += "The acceptable mgcluster methods are furthest, nearest and average. If no method is provided then average is assumed.\n";
49 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50 helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
51 helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
52 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
53 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54 helpString += "The mgcluster command should be in the following format: \n";
55 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
60 m->errorOut(e, "MGClusterCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
67 string outputFileName = "";
68 map<string, vector<string> >::iterator it;
70 //is this a type this command creates
71 it = outputTypes.find(type);
72 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74 if (type == "list") { outputFileName = "list"; }
75 else if (type == "rabund") { outputFileName = "rabund"; }
76 else if (type == "sabund") { outputFileName = "sabund"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
79 return outputFileName;
82 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["list"] = tempOutNames;
93 outputTypes["rabund"] = tempOutNames;
94 outputTypes["sabund"] = tempOutNames;
97 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
104 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111 vector<string> myArray = setParameters();
113 OptionParser parser(option);
114 map<string, string> parameters = parser.getParameters();
116 ValidParameters validParameter;
117 map<string,string>::iterator it;
119 //check to make sure all parameters are valid for command
120 for (it = parameters.begin(); it != parameters.end(); it++) {
121 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
124 //initialize outputTypes
125 vector<string> tempOutNames;
126 outputTypes["list"] = tempOutNames;
127 outputTypes["rabund"] = tempOutNames;
128 outputTypes["sabund"] = tempOutNames;
130 //if the user changes the input directory command factory will send this info to us in the output parameter
131 string inputDir = validParameter.validFile(parameters, "inputdir", false);
132 if (inputDir == "not found"){ inputDir = ""; }
135 it = parameters.find("blast");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["blast"] = inputDir + it->second; }
143 it = parameters.find("name");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["name"] = inputDir + it->second; }
151 it = parameters.find("count");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["count"] = inputDir + it->second; }
161 //check for required parameters
162 blastfile = validParameter.validFile(parameters, "blast", true);
163 if (blastfile == "not open") { blastfile = ""; abort = true; }
164 else if (blastfile == "not found") { blastfile = ""; }
166 //if the user changes the output directory command factory will send this info to us in the output parameter
167 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
169 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
172 namefile = validParameter.validFile(parameters, "name", true);
173 if (namefile == "not open") { abort = true; }
174 else if (namefile == "not found") { namefile = ""; }
175 else { m->setNameFile(namefile); }
177 countfile = validParameter.validFile(parameters, "count", true);
178 if (countfile == "not open") { abort = true; }
179 else if (countfile == "not found") { countfile = ""; }
180 else { m->setCountTableFile(countfile); }
182 if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
184 if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
186 //check for optional parameter and set defaults
188 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
189 precisionLength = temp.length();
190 m->mothurConvert(temp, precision);
192 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
193 m->mothurConvert(temp, cutoff);
194 cutoff += (5 / (precision * 10.0));
196 method = validParameter.validFile(parameters, "method", false);
197 if (method == "not found") { method = "average"; }
199 if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
200 else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
202 temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
203 m->mothurConvert(temp, length);
205 temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
206 m->mothurConvert(temp, penalty);
208 temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
209 minWanted = m->isTrue(temp);
211 temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
212 merge = m->isTrue(temp);
214 temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
215 hclusterWanted = m->isTrue(temp);
217 temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
218 hard = m->isTrue(temp);
222 catch(exception& e) {
223 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
227 //**********************************************************************************************************************
228 int MGClusterCommand::execute(){
230 if (abort == true) { if (calledHelp) { return 0; } return 2; }
233 if (namefile != "") {
234 nameMap = new NameAssignment(namefile);
236 }else{ nameMap= new NameAssignment(); }
238 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
241 float previousDist = 0.00000;
242 float rndPreviousDist = 0.00000;
244 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
245 //must remember to delete those objects here since readBlast does not
246 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
249 list = new ListVector(nameMap->getListVector());
250 RAbundVector* rabund = NULL;
252 if(countfile != "") {
253 //map<string, int> nameMapCounts = m->readNames(namefile);
254 ct = new CountTable();
255 ct->readTable(countfile);
256 rabund = new RAbundVector();
257 createRabund(ct, list, rabund);
259 rabund = new RAbundVector(list->getRAbundVector());
263 //list = new ListVector(nameMap->getListVector());
264 //rabund = new RAbundVector(list->getRAbundVector());
266 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
270 map<string, int> Seq2Bin;
271 map<string, int> oldSeq2Bin;
273 if (method == "furthest") { tag = "fn"; }
274 else if (method == "nearest") { tag = "nn"; }
277 string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
278 string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
279 string listFileName = fileroot+ tag + ".";
280 if (countfile != "") { listFileName += "unique_"; }
281 listFileName += getOutputFileNameTag("list");
283 if (countfile == "") {
284 m->openOutputFile(sabundFileName, sabundFile);
285 m->openOutputFile(rabundFileName, rabundFile);
287 m->openOutputFile(listFileName, listFile);
289 if (m->control_pressed) {
290 delete nameMap; delete read; delete list; delete rabund;
291 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
296 double saveCutoff = cutoff;
298 if (!hclusterWanted) {
299 //get distmatrix and overlap
300 SparseDistanceMatrix* distMatrix = read->getDistMatrix();
301 overlapMatrix = read->getOverlapMatrix(); //already sorted by read
305 if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
306 else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
307 else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
308 cluster->setMapWanted(true);
309 Seq2Bin = cluster->getSeqtoBin();
310 oldSeq2Bin = Seq2Bin;
312 if (m->control_pressed) {
313 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
314 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
319 //cluster using cluster classes
320 while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
322 cluster->update(cutoff);
324 if (m->control_pressed) {
325 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
326 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
331 float dist = distMatrix->getSmallDist();
334 rndDist = m->ceilDist(dist, precision);
336 rndDist = m->roundDist(dist, precision);
339 if(previousDist <= 0.0000 && dist != previousDist){
340 oldList.setLabel("unique");
343 else if(rndDist != rndPreviousDist){
345 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
347 if (m->control_pressed) {
348 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
349 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
354 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
358 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
364 rndPreviousDist = rndDist;
366 Seq2Bin = cluster->getSeqtoBin();
367 oldSeq2Bin = Seq2Bin;
370 if(previousDist <= 0.0000){
371 oldList.setLabel("unique");
374 else if(rndPreviousDist<cutoff){
376 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
378 if (m->control_pressed) {
379 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
380 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
385 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
389 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
395 overlapMatrix.clear();
399 }else { //use hcluster to cluster
400 //get distmatrix and overlap
401 overlapFile = read->getOverlapFile();
402 distFile = read->getDistFile();
405 //sort the distance and overlap files
406 sortHclusterFiles(distFile, overlapFile);
408 if (m->control_pressed) {
409 delete nameMap; delete list; delete rabund;
410 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
416 hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
417 hcluster->setMapWanted(true);
418 Seq2Bin = cluster->getSeqtoBin();
419 oldSeq2Bin = Seq2Bin;
421 vector<seqDist> seqs; seqs.resize(1); // to start loop
422 //ifstream inHcluster;
423 //m->openInputFile(distFile, inHcluster);
425 if (m->control_pressed) {
426 delete nameMap; delete list; delete rabund; delete hcluster;
427 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
432 while (seqs.size() != 0){
434 seqs = hcluster->getSeqs();
436 //to account for cutoff change in average neighbor
437 if (seqs.size() != 0) {
438 if (seqs[0].dist > cutoff) { break; }
441 if (m->control_pressed) {
442 delete nameMap; delete list; delete rabund; delete hcluster;
443 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
444 m->mothurRemove(distFile);
445 m->mothurRemove(overlapFile);
450 for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
452 if (seqs[i].seq1 != seqs[i].seq2) {
454 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
456 if (m->control_pressed) {
457 delete nameMap; delete list; delete rabund; delete hcluster;
458 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
459 m->mothurRemove(distFile);
460 m->mothurRemove(overlapFile);
467 rndDist = m->ceilDist(seqs[i].dist, precision);
469 rndDist = m->roundDist(seqs[i].dist, precision);
472 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
473 oldList.setLabel("unique");
476 else if((rndDist != rndPreviousDist)){
478 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
480 if (m->control_pressed) {
481 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
482 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
483 m->mothurRemove(distFile);
484 m->mothurRemove(overlapFile);
489 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
493 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
498 previousDist = seqs[i].dist;
499 rndPreviousDist = rndDist;
501 Seq2Bin = cluster->getSeqtoBin();
502 oldSeq2Bin = Seq2Bin;
506 //inHcluster.close();
508 if(previousDist <= 0.0000){
509 oldList.setLabel("unique");
512 else if(rndPreviousDist<cutoff){
514 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
516 if (m->control_pressed) {
517 delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
518 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
519 m->mothurRemove(distFile);
520 m->mothurRemove(overlapFile);
525 temp->setLabel(toString(rndPreviousDist, precisionLength-1));
529 oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
535 m->mothurRemove(distFile);
536 m->mothurRemove(overlapFile);
542 if (countfile == "") {
546 if (m->control_pressed) {
548 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
553 m->mothurOutEndLine();
554 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
555 m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
556 if (countfile == "") {
557 m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
558 m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
560 m->mothurOutEndLine();
562 if (saveCutoff != cutoff) {
563 if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
564 else { saveCutoff = m->roundDist(saveCutoff, precision); }
566 m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
569 //set list file as new current listfile
571 itTypes = outputTypes.find("list");
572 if (itTypes != outputTypes.end()) {
573 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
576 //set rabund file as new current rabundfile
577 itTypes = outputTypes.find("rabund");
578 if (itTypes != outputTypes.end()) {
579 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
582 //set sabund file as new current sabundfile
583 itTypes = outputTypes.find("sabund");
584 if (itTypes != outputTypes.end()) {
585 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
589 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
593 catch(exception& e) {
594 m->errorOut(e, "MGClusterCommand", "execute");
598 //**********************************************************************************************************************
599 void MGClusterCommand::printData(ListVector* mergedList){
601 mergedList->print(listFile);
602 SAbundVector sabund = mergedList->getSAbundVector();
604 if (countfile == "") {
605 mergedList->getRAbundVector().print(rabundFile);
606 sabund.print(sabundFile);
611 catch(exception& e) {
612 m->errorOut(e, "MGClusterCommand", "printData");
616 //**********************************************************************************************************************
617 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
618 //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data.
619 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
621 //create new listvector so you don't overwrite the clustering
622 ListVector* newList = new ListVector(oldList);
628 if (hclusterWanted) {
629 m->openInputFile(overlapFile, inOverlap);
630 if (inOverlap.eof()) { done = true; }
631 }else { if (overlapMatrix.size() == 0) { done = true; } }
634 if (m->control_pressed) {
635 if (hclusterWanted) { inOverlap.close(); }
641 if (!hclusterWanted) {
642 if (count < overlapMatrix.size()) { //do we have another node in the matrix
643 overlapNode = overlapMatrix[count];
647 if (!inOverlap.eof()) {
648 string firstName, secondName;
649 float overlapDistance;
650 inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
652 //commented out because we check this in readblast already
653 //map<string,int>::iterator itA = nameMap->find(firstName);
654 //map<string,int>::iterator itB = nameMap->find(secondName);
655 //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
656 //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
658 //overlapNode.seq1 = itA->second;
659 //overlapNode.seq2 = itB->second;
660 overlapNode.seq1 = nameMap->get(firstName);
661 overlapNode.seq2 = nameMap->get(secondName);
662 overlapNode.dist = overlapDistance;
663 }else { inOverlap.close(); break; }
666 if (overlapNode.dist < dist) {
667 //get names of seqs that overlap
668 string name1 = nameMap->get(overlapNode.seq1);
669 string name2 = nameMap->get(overlapNode.seq2);
671 //use binInfo to find out if they are already in the same bin
672 //map<string, int>::iterator itBin1 = binInfo.find(name1);
673 //map<string, int>::iterator itBin2 = binInfo.find(name2);
675 //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
676 //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
678 //int binKeep = itBin1->second;
679 //int binRemove = itBin2->second;
681 int binKeep = binInfo[name1];
682 int binRemove = binInfo[name2];
684 //if not merge bins and update binInfo
685 if(binKeep != binRemove) {
687 //save names in old bin
688 string names = newList->get(binRemove);
690 //merge bins into name1s bin
691 newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
692 newList->set(binRemove, "");
695 while (names.find_first_of(',') != -1) {
697 string name = names.substr(0,names.find_first_of(','));
698 //save name and bin number
699 binInfo[name] = binKeep;
700 names = names.substr(names.find_first_of(',')+1, names.length());
704 binInfo[names] = binKeep;
707 }else { done = true; }
714 catch(exception& e) {
715 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
719 //**********************************************************************************************************************
720 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
723 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
724 m->mothurRemove(unsortedDist); //delete unsorted file
725 distFile = sortedDistFile;
728 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
729 m->mothurRemove(unsortedOverlap); //delete unsorted file
730 overlapFile = sortedOverlapFile;
732 catch(exception& e) {
733 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
738 //**********************************************************************************************************************
740 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
742 //vector<string> names = ct.getNamesOfSeqs();
744 //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
747 for(int i = 0; i < list->getNumBins(); i++) {
748 vector<string> binNames;
749 string bin = list->get(i);
750 m->splitAtComma(bin, binNames);
752 for (int j = 0; j < binNames.size(); j++) {
753 total += ct->getNumSeqs(binNames[j]);
755 rabund->push_back(total);
760 catch(exception& e) {
761 m->errorOut(e, "MGClusterCommand", "createRabund");
767 //**********************************************************************************************************************