]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
working on current change
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MGClusterCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46                 helpString += "The precision parameter's default value is 100. \n";
47                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
48                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
50                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
51                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
52                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53                 helpString += "The mgcluster command should be in the following format: \n";
54                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MGClusterCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 MGClusterCommand::MGClusterCommand(){   
65         try {
66                 abort = true; calledHelp = true; 
67                 setParameters();
68                 vector<string> tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["rabund"] = tempOutNames;
71                 outputTypes["sabund"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 MGClusterCommand::MGClusterCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string, string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                 
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["list"] = tempOutNames;
104                         outputTypes["rabund"] = tempOutNames;
105                         outputTypes["sabund"] = tempOutNames;
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("blast");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
118                                 }
119                                 
120                                 it = parameters.find("name");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
126                                 }
127                         }
128
129                         
130                         //check for required parameters
131                         blastfile = validParameter.validFile(parameters, "blast", true);
132                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
133                         else if (blastfile == "not found") { blastfile = ""; }
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
137                                 outputDir = ""; 
138                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
139                         }
140                         
141                         namefile = validParameter.validFile(parameters, "name", true);
142                         if (namefile == "not open") { abort = true; }   
143                         else if (namefile == "not found") { namefile = ""; }
144                         else { m->setNameFile(namefile); }
145                         
146                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
147                         
148                         //check for optional parameter and set defaults
149                         string temp;
150                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
151                         precisionLength = temp.length();
152                         convert(temp, precision); 
153                         
154                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
155                         convert(temp, cutoff); 
156                         cutoff += (5 / (precision * 10.0));
157                         
158                         method = validParameter.validFile(parameters, "method", false);
159                         if (method == "not found") { method = "average"; }
160                         
161                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
162                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
163
164                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
165                         convert(temp, length); 
166                         
167                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
168                         convert(temp, penalty); 
169                         
170                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
171                         minWanted = m->isTrue(temp); 
172                         
173                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
174                         merge = m->isTrue(temp); 
175                         
176                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
177                         hclusterWanted = m->isTrue(temp); 
178                         
179                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
180                         hard = m->isTrue(temp);
181                 }
182
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
186                 exit(1);
187         }
188 }
189 //**********************************************************************************************************************
190 int MGClusterCommand::execute(){
191         try {
192                 
193                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
194                 
195                 //read names file
196                 if (namefile != "") {
197                         nameMap = new NameAssignment(namefile);
198                         nameMap->readMap();
199                 }else{ nameMap= new NameAssignment(); }
200                 
201                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
202                 string tag = "";
203                 time_t start;
204                 float previousDist = 0.00000;
205                 float rndPreviousDist = 0.00000;
206                 
207                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
208                 //must remember to delete those objects here since readBlast does not
209                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
210                 read->read(nameMap);
211                 
212                 list = new ListVector(nameMap->getListVector());
213                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
214                 
215                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
216                 
217                 start = time(NULL);
218                 oldList = *list;
219                 map<string, int> Seq2Bin;
220                 map<string, int> oldSeq2Bin;
221                 
222                 if (method == "furthest")               { tag = "fn";  }
223                 else if (method == "nearest")   { tag = "nn";  }
224                 else                                                    { tag = "an";  }
225                 
226                 //open output files
227                 m->openOutputFile(fileroot+ tag + ".list",  listFile);
228                 m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
229                 m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
230                 
231                 if (m->control_pressed) { 
232                         delete nameMap; delete read; delete list; delete rabund; 
233                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
234                         outputTypes.clear();
235                         return 0; 
236                 }
237                 
238                 if (!hclusterWanted) {
239                         //get distmatrix and overlap
240                         SparseMatrix* distMatrix = read->getDistMatrix();
241                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
242                         delete read;
243                 
244                         //create cluster
245                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
246                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
247                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
248                         cluster->setMapWanted(true);
249                         Seq2Bin = cluster->getSeqtoBin();
250                         oldSeq2Bin = Seq2Bin;
251                         
252                         if (m->control_pressed) { 
253                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
254                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
255                                 outputTypes.clear();
256                                 return 0; 
257                         }
258                 
259                         //cluster using cluster classes
260                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
261                                 
262                                 cluster->update(cutoff);
263                                 
264                                 if (m->control_pressed) { 
265                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
266                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
267                                         outputTypes.clear();
268                                         return 0; 
269                                 }
270                                 
271                                 float dist = distMatrix->getSmallDist();
272                                 float rndDist;
273                                 if (hard) {
274                                         rndDist = m->ceilDist(dist, precision); 
275                                 }else{
276                                         rndDist = m->roundDist(dist, precision); 
277                                 }
278                                 
279                                 if(previousDist <= 0.0000 && dist != previousDist){
280                                         oldList.setLabel("unique");
281                                         printData(&oldList);
282                                 }
283                                 else if(rndDist != rndPreviousDist){
284                                         if (merge) {
285                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
286                                                 
287                                                 if (m->control_pressed) { 
288                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
289                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
290                                                         outputTypes.clear();
291                                                         return 0; 
292                                                 }
293                                                 
294                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
295                                                 printData(temp);
296                                                 delete temp;
297                                         }else{
298                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
299                                                 printData(&oldList);
300                                         }
301                                 }
302         
303                                 previousDist = dist;
304                                 rndPreviousDist = rndDist;
305                                 oldList = *list;
306                                 Seq2Bin = cluster->getSeqtoBin();
307                                 oldSeq2Bin = Seq2Bin;
308                         }
309                         
310                         if(previousDist <= 0.0000){
311                                 oldList.setLabel("unique");
312                                 printData(&oldList);
313                         }
314                         else if(rndPreviousDist<cutoff){
315                                 if (merge) {
316                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
317                                         
318                                         if (m->control_pressed) { 
319                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
320                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
321                                                         outputTypes.clear();
322                                                         return 0; 
323                                         }
324                                         
325                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
326                                         printData(temp);
327                                         delete temp;
328                                 }else{
329                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
330                                         printData(&oldList);
331                                 }
332                         }
333                         
334                         //free memory
335                         overlapMatrix.clear();
336                         delete distMatrix;
337                         delete cluster;
338                         
339                 }else { //use hcluster to cluster
340                         //get distmatrix and overlap
341                         overlapFile = read->getOverlapFile();
342                         distFile = read->getDistFile(); 
343                         delete read;
344                 
345                         //sort the distance and overlap files
346                         sortHclusterFiles(distFile, overlapFile);
347                         
348                         if (m->control_pressed) { 
349                                 delete nameMap;  delete list; delete rabund; 
350                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
351                                 outputTypes.clear();
352                                 return 0; 
353                         }
354                 
355                         //create cluster
356                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
357                         hcluster->setMapWanted(true);
358                         Seq2Bin = cluster->getSeqtoBin();
359                         oldSeq2Bin = Seq2Bin;
360                         
361                         vector<seqDist> seqs; seqs.resize(1); // to start loop
362                         //ifstream inHcluster;
363                         //m->openInputFile(distFile, inHcluster);
364                         
365                         if (m->control_pressed) { 
366                                 delete nameMap;  delete list; delete rabund; delete hcluster;
367                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
368                                 outputTypes.clear();
369                                 return 0; 
370                         }
371
372                         while (seqs.size() != 0){
373                 
374                                 seqs = hcluster->getSeqs();
375                                 
376                                 if (m->control_pressed) { 
377                                         delete nameMap;  delete list; delete rabund; delete hcluster;
378                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
379                                         remove(distFile.c_str());
380                                         remove(overlapFile.c_str());
381                                         outputTypes.clear();
382                                         return 0; 
383                                 }
384                                 
385                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
386                                         
387                                         if (seqs[i].seq1 != seqs[i].seq2) {
388                 
389                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
390                                                 
391                                                 if (m->control_pressed) { 
392                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
393                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
394                                                         remove(distFile.c_str());
395                                                         remove(overlapFile.c_str());
396                                                         outputTypes.clear();
397                                                         return 0; 
398                                                 }
399         
400                                                 float rndDist;
401                                                 if (hard) {
402                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
403                                                 }else{
404                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
405                                                 }
406                                                                                                 
407                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
408                                                         oldList.setLabel("unique");
409                                                         printData(&oldList);
410                                                 }
411                                                 else if((rndDist != rndPreviousDist)){
412                                                         if (merge) {
413                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
414                                                                 
415                                                                 if (m->control_pressed) { 
416                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
417                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
418                                                                         remove(distFile.c_str());
419                                                                         remove(overlapFile.c_str());
420                                                                         outputTypes.clear();
421                                                                         return 0; 
422                                                                 }
423
424                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
425                                                                 printData(temp);
426                                                                 delete temp;
427                                                         }else{
428                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
429                                                                 printData(&oldList);
430                                                         }
431                                                 }
432                                                 
433                                                 previousDist = seqs[i].dist;
434                                                 rndPreviousDist = rndDist;
435                                                 oldList = *list;
436                                                 Seq2Bin = cluster->getSeqtoBin();
437                                                 oldSeq2Bin = Seq2Bin;
438                                         }
439                                 }
440                         }
441                         //inHcluster.close();
442                         
443                         if(previousDist <= 0.0000){
444                                 oldList.setLabel("unique");
445                                 printData(&oldList);
446                         }
447                         else if(rndPreviousDist<cutoff){
448                                 if (merge) {
449                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
450                                         
451                                         if (m->control_pressed) { 
452                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
453                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
454                                                         remove(distFile.c_str());
455                                                         remove(overlapFile.c_str());
456                                                         outputTypes.clear();
457                                                         return 0; 
458                                         }
459                                         
460                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
461                                         printData(temp);
462                                         delete temp;
463                                 }else{
464                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
465                                         printData(&oldList);
466                                 }
467                         }
468                         
469                         delete hcluster;
470                         remove(distFile.c_str());
471                         remove(overlapFile.c_str());
472                 }
473                 
474                 delete list; 
475                 delete rabund;
476                 listFile.close();
477                 sabundFile.close();
478                 rabundFile.close();
479         
480                 if (m->control_pressed) { 
481                         delete nameMap; 
482                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
483                         outputTypes.clear();
484                         return 0; 
485                 }
486                 
487                 m->mothurOutEndLine();
488                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
489                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
490                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
491                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
492                 m->mothurOutEndLine();
493                 
494                 //set list file as new current listfile
495                 string current = "";
496                 itTypes = outputTypes.find("list");
497                 if (itTypes != outputTypes.end()) {
498                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
499                 }
500                 
501                 //set rabund file as new current rabundfile
502                 itTypes = outputTypes.find("rabund");
503                 if (itTypes != outputTypes.end()) {
504                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
505                 }
506                 
507                 //set sabund file as new current sabundfile
508                 itTypes = outputTypes.find("sabund");
509                 if (itTypes != outputTypes.end()) {
510                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
511                 }
512                 
513                 
514                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
515                         
516                 return 0;
517         }
518         catch(exception& e) {
519                 m->errorOut(e, "MGClusterCommand", "execute");
520                 exit(1);
521         }
522 }
523 //**********************************************************************************************************************
524 void MGClusterCommand::printData(ListVector* mergedList){
525         try {
526                 mergedList->print(listFile);
527                 mergedList->getRAbundVector().print(rabundFile);
528                 
529                 SAbundVector sabund = mergedList->getSAbundVector();
530
531                 sabund.print(cout);
532                 sabund.print(sabundFile);
533         }
534         catch(exception& e) {
535                 m->errorOut(e, "MGClusterCommand", "printData");
536                 exit(1);
537         }
538 }
539 //**********************************************************************************************************************
540 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
541 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
542 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
543         try {
544                 //create new listvector so you don't overwrite the clustering
545                 ListVector* newList = new ListVector(oldList);
546
547                 bool done = false;
548                 ifstream inOverlap;
549                 int count = 0;
550                 
551                 if (hclusterWanted) {  
552                         m->openInputFile(overlapFile, inOverlap);  
553                         if (inOverlap.eof()) {  done = true;  }
554                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
555                 
556                 while (!done) {
557                         if (m->control_pressed) { 
558                                 if (hclusterWanted) {   inOverlap.close();  }           
559                                 return newList;
560                         }
561                         
562                         //get next overlap
563                         seqDist overlapNode;
564                         if (!hclusterWanted) {  
565                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
566                                         overlapNode = overlapMatrix[count];
567                                         count++;
568                                 }else { break; }
569                         }else { 
570                                 if (!inOverlap.eof()) {
571                                         string firstName, secondName;
572                                         float overlapDistance;
573                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
574                                         
575                                         //commented out because we check this in readblast already
576                                         //map<string,int>::iterator itA = nameMap->find(firstName);
577                                         //map<string,int>::iterator itB = nameMap->find(secondName);
578                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
579                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
580                                         
581                                         //overlapNode.seq1 = itA->second;
582                                         //overlapNode.seq2 = itB->second;
583                                         overlapNode.seq1 = nameMap->get(firstName);
584                                         overlapNode.seq2 = nameMap->get(secondName);
585                                         overlapNode.dist = overlapDistance;
586                                 }else { inOverlap.close(); break; }
587                         } 
588                 
589                         if (overlapNode.dist < dist) {
590                                 //get names of seqs that overlap
591                                 string name1 = nameMap->get(overlapNode.seq1);
592                                 string name2 = nameMap->get(overlapNode.seq2);
593                         
594                                 //use binInfo to find out if they are already in the same bin
595                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
596                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
597                                 
598                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
599                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
600
601                                 //int binKeep = itBin1->second;
602                                 //int binRemove = itBin2->second;
603                                 
604                                 int binKeep = binInfo[name1];
605                                 int binRemove = binInfo[name2];
606                         
607                                 //if not merge bins and update binInfo
608                                 if(binKeep != binRemove) {
609                 
610                                         //save names in old bin
611                                         string names = newList->get(binRemove);
612                 
613                                         //merge bins into name1s bin
614                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
615                                         newList->set(binRemove, "");    
616                                         
617                                         //update binInfo
618                                         while (names.find_first_of(',') != -1) { 
619                                                 //get name from bin
620                                                 string name = names.substr(0,names.find_first_of(','));
621                                                 //save name and bin number
622                                                 binInfo[name] = binKeep;
623                                                 names = names.substr(names.find_first_of(',')+1, names.length());
624                                         }
625                                         
626                                         //get last name
627                                         binInfo[names] = binKeep;
628                                 }
629                                 
630                         }else { done = true; }
631                 }
632                 
633                 //return listvector
634                 return newList;
635                                 
636         }
637         catch(exception& e) {
638                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
639                 exit(1);
640         }
641 }
642 //**********************************************************************************************************************
643 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
644         try {
645                 //sort distFile
646                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
647                 remove(unsortedDist.c_str());  //delete unsorted file
648                 distFile = sortedDistFile;
649                 
650                 //sort overlap file
651                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
652                 remove(unsortedOverlap.c_str());  //delete unsorted file
653                 overlapFile = sortedOverlapFile;
654         }
655         catch(exception& e) {
656                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
657                 exit(1);
658         }
659 }
660
661 //**********************************************************************************************************************
662
663
664
665
666