]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
fixed subsample name file name issue. added count parameter to cluster command....
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){       
41         try {
42                 string helpString = "";
43                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47                 helpString += "The precision parameter's default value is 100. \n";
48                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
49                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
51                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
52                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
53                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54                 helpString += "The mgcluster command should be in the following format: \n";
55                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MGClusterCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
66         try {
67         string outputFileName = "";
68                 map<string, vector<string> >::iterator it;
69         
70         //is this a type this command creates
71         it = outputTypes.find(type);
72         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73         else {
74             if (type == "list") {  outputFileName =  "list"; }
75             else if (type == "rabund") {  outputFileName =  "rabund"; }
76             else if (type == "sabund") {  outputFileName =  "sabund"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){   
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["list"] = tempOutNames;
93                 outputTypes["rabund"] = tempOutNames;
94                 outputTypes["sabund"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
103         try {
104                 abort = false; calledHelp = false;   
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string, string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter;
117                         map<string,string>::iterator it;
118                 
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         //initialize outputTypes
125                         vector<string> tempOutNames;
126                         outputTypes["list"] = tempOutNames;
127                         outputTypes["rabund"] = tempOutNames;
128                         outputTypes["sabund"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("blast");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
141                                 }
142                                 
143                                 it = parameters.find("name");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
149                                 }
150                         }
151
152                         
153                         //check for required parameters
154                         blastfile = validParameter.validFile(parameters, "blast", true);
155                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
156                         else if (blastfile == "not found") { blastfile = ""; }
157                         
158                         //if the user changes the output directory command factory will send this info to us in the output parameter 
159                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
160                                 outputDir = ""; 
161                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
162                         }
163                         
164                         namefile = validParameter.validFile(parameters, "name", true);
165                         if (namefile == "not open") { abort = true; }   
166                         else if (namefile == "not found") { namefile = ""; }
167                         else { m->setNameFile(namefile); }
168             
169             countfile = validParameter.validFile(parameters, "count", true);
170                         if (countfile == "not open") { abort = true; }  
171                         else if (countfile == "not found") { countfile = ""; }
172             else { m->setCountTableFile(countfile); }
173             
174             if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
175                         
176                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
177                         
178                         //check for optional parameter and set defaults
179                         string temp;
180             temp = validParameter.validFile(parameters, "precision", false);            if (temp == "not found") { temp = "100"; }
181                         precisionLength = temp.length();
182                         m->mothurConvert(temp, precision); 
183                         
184                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
185                         m->mothurConvert(temp, cutoff); 
186                         cutoff += (5 / (precision * 10.0));
187                         
188                         method = validParameter.validFile(parameters, "method", false);
189                         if (method == "not found") { method = "average"; }
190                         
191                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
192                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
193
194                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
195                         m->mothurConvert(temp, length); 
196                         
197                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
198                         m->mothurConvert(temp, penalty); 
199                         
200                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
201                         minWanted = m->isTrue(temp); 
202                         
203                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
204                         merge = m->isTrue(temp); 
205                         
206                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
207                         hclusterWanted = m->isTrue(temp); 
208                         
209                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
210                         hard = m->isTrue(temp);            
211                 }
212
213         }
214         catch(exception& e) {
215                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
216                 exit(1);
217         }
218 }
219 //**********************************************************************************************************************
220 int MGClusterCommand::execute(){
221         try {
222                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
223                 
224                 //read names file
225                 if (namefile != "") {
226                         nameMap = new NameAssignment(namefile);
227                         nameMap->readMap();
228                 }else{ nameMap= new NameAssignment(); }
229                 
230                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
231                 string tag = "";
232                 time_t start;
233                 float previousDist = 0.00000;
234                 float rndPreviousDist = 0.00000; 
235         
236                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
237                 //must remember to delete those objects here since readBlast does not
238                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
239                 read->read(nameMap);
240         
241         list = new ListVector(nameMap->getListVector());
242         RAbundVector* rabund = NULL;
243         
244         if(countfile != "") {
245             //map<string, int> nameMapCounts = m->readNames(namefile);
246             ct = new CountTable();
247             ct->readTable(countfile);
248             rabund = new RAbundVector();
249             createRabund(ct, list, rabund);
250         }else {
251             rabund = new RAbundVector(list->getRAbundVector());
252         }
253         
254                 
255                 //list = new ListVector(nameMap->getListVector());
256                 //rabund = new RAbundVector(list->getRAbundVector());
257                 
258                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
259                 
260                 start = time(NULL);
261                 oldList = *list;
262                 map<string, int> Seq2Bin;
263                 map<string, int> oldSeq2Bin;
264                 
265                 if (method == "furthest")               { tag = "fn";  }
266                 else if (method == "nearest")   { tag = "nn";  }
267                 else                                                    { tag = "an";  }
268                 
269         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
270         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
271         string listFileName = fileroot+ tag + ".";
272         if (countfile != "") { listFileName += "unique_"; }
273         listFileName += getOutputFileNameTag("list");
274         
275         if (countfile == "") {
276             m->openOutputFile(sabundFileName,   sabundFile);
277             m->openOutputFile(rabundFileName,   rabundFile);
278         }
279                 m->openOutputFile(listFileName, listFile);
280                 
281                 if (m->control_pressed) { 
282                         delete nameMap; delete read; delete list; delete rabund; 
283                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
284                         outputTypes.clear();
285                         return 0; 
286                 }
287                 
288                 double saveCutoff = cutoff;
289                 
290                 if (!hclusterWanted) {
291                         //get distmatrix and overlap
292                         SparseDistanceMatrix* distMatrix = read->getDistMatrix();
293                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
294                         delete read;
295                 
296                         //create cluster
297                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
298                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
299                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
300                         cluster->setMapWanted(true);
301                         Seq2Bin = cluster->getSeqtoBin();
302                         oldSeq2Bin = Seq2Bin;
303                         
304                         if (m->control_pressed) { 
305                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
306                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
307                                 outputTypes.clear();
308                                 return 0; 
309                         }
310                 
311                         //cluster using cluster classes
312                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
313                                 
314                                 cluster->update(cutoff);
315                                 
316                                 if (m->control_pressed) { 
317                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
318                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
319                                         outputTypes.clear();
320                                         return 0; 
321                                 }
322                                 
323                                 float dist = distMatrix->getSmallDist();
324                                 float rndDist;
325                                 if (hard) {
326                                         rndDist = m->ceilDist(dist, precision); 
327                                 }else{
328                                         rndDist = m->roundDist(dist, precision); 
329                                 }
330                                 
331                                 if(previousDist <= 0.0000 && dist != previousDist){
332                                         oldList.setLabel("unique");
333                                         printData(&oldList);
334                                 }
335                                 else if(rndDist != rndPreviousDist){
336                                         if (merge) {
337                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
338                                                 
339                                                 if (m->control_pressed) { 
340                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
341                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
342                                                         outputTypes.clear();
343                                                         return 0; 
344                                                 }
345                                                 
346                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
347                                                 printData(temp);
348                                                 delete temp;
349                                         }else{
350                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
351                                                 printData(&oldList);
352                                         }
353                                 }
354         
355                                 previousDist = dist;
356                                 rndPreviousDist = rndDist;
357                                 oldList = *list;
358                                 Seq2Bin = cluster->getSeqtoBin();
359                                 oldSeq2Bin = Seq2Bin;
360                         }
361                         
362                         if(previousDist <= 0.0000){
363                                 oldList.setLabel("unique");
364                                 printData(&oldList);
365                         }
366                         else if(rndPreviousDist<cutoff){
367                                 if (merge) {
368                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
369                                         
370                                         if (m->control_pressed) { 
371                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
372                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
373                                                         outputTypes.clear();
374                                                         return 0; 
375                                         }
376                                         
377                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
378                                         printData(temp);
379                                         delete temp;
380                                 }else{
381                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
382                                         printData(&oldList);
383                                 }
384                         }
385                         
386                         //free memory
387                         overlapMatrix.clear();
388                         delete distMatrix;
389                         delete cluster;
390                         
391                 }else { //use hcluster to cluster
392                         //get distmatrix and overlap
393                         overlapFile = read->getOverlapFile();
394                         distFile = read->getDistFile(); 
395                         delete read;
396                 
397                         //sort the distance and overlap files
398                         sortHclusterFiles(distFile, overlapFile);
399                         
400                         if (m->control_pressed) { 
401                                 delete nameMap;  delete list; delete rabund; 
402                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
403                                 outputTypes.clear();
404                                 return 0; 
405                         }
406                 
407                         //create cluster
408                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
409                         hcluster->setMapWanted(true);
410                         Seq2Bin = cluster->getSeqtoBin();
411                         oldSeq2Bin = Seq2Bin;
412                         
413                         vector<seqDist> seqs; seqs.resize(1); // to start loop
414                         //ifstream inHcluster;
415                         //m->openInputFile(distFile, inHcluster);
416                         
417                         if (m->control_pressed) { 
418                                 delete nameMap;  delete list; delete rabund; delete hcluster;
419                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
420                                 outputTypes.clear();
421                                 return 0; 
422                         }
423
424                         while (seqs.size() != 0){
425                 
426                                 seqs = hcluster->getSeqs();
427                                 
428                                 //to account for cutoff change in average neighbor
429                                 if (seqs.size() != 0) {
430                                         if (seqs[0].dist > cutoff) { break; }
431                                 }
432                                 
433                                 if (m->control_pressed) { 
434                                         delete nameMap;  delete list; delete rabund; delete hcluster;
435                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
436                                         m->mothurRemove(distFile);
437                                         m->mothurRemove(overlapFile);
438                                         outputTypes.clear();
439                                         return 0; 
440                                 }
441                                 
442                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
443                                         
444                                         if (seqs[i].seq1 != seqs[i].seq2) {
445                 
446                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
447                                                 
448                                                 if (m->control_pressed) { 
449                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
450                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
451                                                         m->mothurRemove(distFile);
452                                                         m->mothurRemove(overlapFile);
453                                                         outputTypes.clear();
454                                                         return 0; 
455                                                 }
456         
457                                                 float rndDist;
458                                                 if (hard) {
459                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
460                                                 }else{
461                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
462                                                 }
463                                                                                                 
464                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
465                                                         oldList.setLabel("unique");
466                                                         printData(&oldList);
467                                                 }
468                                                 else if((rndDist != rndPreviousDist)){
469                                                         if (merge) {
470                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
471                                                                 
472                                                                 if (m->control_pressed) { 
473                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
474                                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
475                                                                         m->mothurRemove(distFile);
476                                                                         m->mothurRemove(overlapFile);
477                                                                         outputTypes.clear();
478                                                                         return 0; 
479                                                                 }
480
481                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
482                                                                 printData(temp);
483                                                                 delete temp;
484                                                         }else{
485                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
486                                                                 printData(&oldList);
487                                                         }
488                                                 }
489                                                 
490                                                 previousDist = seqs[i].dist;
491                                                 rndPreviousDist = rndDist;
492                                                 oldList = *list;
493                                                 Seq2Bin = cluster->getSeqtoBin();
494                                                 oldSeq2Bin = Seq2Bin;
495                                         }
496                                 }
497                         }
498                         //inHcluster.close();
499                         
500                         if(previousDist <= 0.0000){
501                                 oldList.setLabel("unique");
502                                 printData(&oldList);
503                         }
504                         else if(rndPreviousDist<cutoff){
505                                 if (merge) {
506                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
507                                         
508                                         if (m->control_pressed) { 
509                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
510                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
511                                                         m->mothurRemove(distFile);
512                                                         m->mothurRemove(overlapFile);
513                                                         outputTypes.clear();
514                                                         return 0; 
515                                         }
516                                         
517                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
518                                         printData(temp);
519                                         delete temp;
520                                 }else{
521                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
522                                         printData(&oldList);
523                                 }
524                         }
525                         
526                         delete hcluster;
527                         m->mothurRemove(distFile);
528                         m->mothurRemove(overlapFile);
529                 }
530                 
531                 delete list;
532                 delete rabund;
533                 listFile.close();
534         if (countfile == "") {
535             sabundFile.close();
536             rabundFile.close();
537         }
538                 if (m->control_pressed) { 
539                         delete nameMap; 
540                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
541                         outputTypes.clear();
542                         return 0; 
543                 }
544                 
545                 m->mothurOutEndLine();
546                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
547                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
548                 if (countfile == "") {
549             m->mothurOut(rabundFileName); m->mothurOutEndLine();        outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
550             m->mothurOut(sabundFileName); m->mothurOutEndLine();        outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
551         }
552                 m->mothurOutEndLine();
553                 
554                 if (saveCutoff != cutoff) { 
555                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
556                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
557                         
558                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
559                 }
560                 
561                 //set list file as new current listfile
562                 string current = "";
563                 itTypes = outputTypes.find("list");
564                 if (itTypes != outputTypes.end()) {
565                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
566                 }
567                 
568                 //set rabund file as new current rabundfile
569                 itTypes = outputTypes.find("rabund");
570                 if (itTypes != outputTypes.end()) {
571                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
572                 }
573                 
574                 //set sabund file as new current sabundfile
575                 itTypes = outputTypes.find("sabund");
576                 if (itTypes != outputTypes.end()) {
577                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
578                 }
579                 
580                 
581                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
582                         
583                 return 0;
584         }
585         catch(exception& e) {
586                 m->errorOut(e, "MGClusterCommand", "execute");
587                 exit(1);
588         }
589 }
590 //**********************************************************************************************************************
591 void MGClusterCommand::printData(ListVector* mergedList){
592         try {
593                 mergedList->print(listFile);
594         SAbundVector sabund = mergedList->getSAbundVector();
595         
596         if (countfile == "") {
597             mergedList->getRAbundVector().print(rabundFile);
598             sabund.print(sabundFile);
599         }
600
601                 sabund.print(cout);
602         }
603         catch(exception& e) {
604                 m->errorOut(e, "MGClusterCommand", "printData");
605                 exit(1);
606         }
607 }
608 //**********************************************************************************************************************
609 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
610 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
611 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
612         try {
613                 //create new listvector so you don't overwrite the clustering
614                 ListVector* newList = new ListVector(oldList);
615
616                 bool done = false;
617                 ifstream inOverlap;
618                 int count = 0;
619                 
620                 if (hclusterWanted) {  
621                         m->openInputFile(overlapFile, inOverlap);  
622                         if (inOverlap.eof()) {  done = true;  }
623                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
624                 
625                 while (!done) {
626                         if (m->control_pressed) { 
627                                 if (hclusterWanted) {   inOverlap.close();  }           
628                                 return newList;
629                         }
630                         
631                         //get next overlap
632                         seqDist overlapNode;
633                         if (!hclusterWanted) {  
634                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
635                                         overlapNode = overlapMatrix[count];
636                                         count++;
637                                 }else { break; }
638                         }else { 
639                                 if (!inOverlap.eof()) {
640                                         string firstName, secondName;
641                                         float overlapDistance;
642                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
643                                         
644                                         //commented out because we check this in readblast already
645                                         //map<string,int>::iterator itA = nameMap->find(firstName);
646                                         //map<string,int>::iterator itB = nameMap->find(secondName);
647                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
648                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
649                                         
650                                         //overlapNode.seq1 = itA->second;
651                                         //overlapNode.seq2 = itB->second;
652                                         overlapNode.seq1 = nameMap->get(firstName);
653                                         overlapNode.seq2 = nameMap->get(secondName);
654                                         overlapNode.dist = overlapDistance;
655                                 }else { inOverlap.close(); break; }
656                         } 
657                 
658                         if (overlapNode.dist < dist) {
659                                 //get names of seqs that overlap
660                                 string name1 = nameMap->get(overlapNode.seq1);
661                                 string name2 = nameMap->get(overlapNode.seq2);
662                         
663                                 //use binInfo to find out if they are already in the same bin
664                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
665                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
666                                 
667                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
668                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
669
670                                 //int binKeep = itBin1->second;
671                                 //int binRemove = itBin2->second;
672                                 
673                                 int binKeep = binInfo[name1];
674                                 int binRemove = binInfo[name2];
675                         
676                                 //if not merge bins and update binInfo
677                                 if(binKeep != binRemove) {
678                 
679                                         //save names in old bin
680                                         string names = newList->get(binRemove);
681                 
682                                         //merge bins into name1s bin
683                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
684                                         newList->set(binRemove, "");    
685                                         
686                                         //update binInfo
687                                         while (names.find_first_of(',') != -1) { 
688                                                 //get name from bin
689                                                 string name = names.substr(0,names.find_first_of(','));
690                                                 //save name and bin number
691                                                 binInfo[name] = binKeep;
692                                                 names = names.substr(names.find_first_of(',')+1, names.length());
693                                         }
694                                         
695                                         //get last name
696                                         binInfo[names] = binKeep;
697                                 }
698                                 
699                         }else { done = true; }
700                 }
701                 
702                 //return listvector
703                 return newList;
704                                 
705         }
706         catch(exception& e) {
707                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
708                 exit(1);
709         }
710 }
711 //**********************************************************************************************************************
712 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
713         try {
714                 //sort distFile
715                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
716                 m->mothurRemove(unsortedDist);  //delete unsorted file
717                 distFile = sortedDistFile;
718                 
719                 //sort overlap file
720                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
721                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
722                 overlapFile = sortedOverlapFile;
723         }
724         catch(exception& e) {
725                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
726                 exit(1);
727         }
728 }
729
730 //**********************************************************************************************************************
731
732 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
733     try {
734         //vector<string> names = ct.getNamesOfSeqs();
735
736         //for ( int i; i < ct.getNumGroups(); i++ ) {    rav.push_back( ct.getNumSeqs(names[i]) );    }
737         //return rav;
738         
739         for(int i = 0; i < list->getNumBins(); i++) { 
740            vector<string> binNames;
741            string bin = list->get(i);
742            m->splitAtComma(bin, binNames);
743            int total = 0;
744            for (int j = 0; j < binNames.size(); j++) { 
745                total += ct->getNumSeqs(binNames[j]);
746            }
747            rabund->push_back(total);   
748        }
749         
750         
751     }
752     catch(exception& e) {
753                 m->errorOut(e, "MGClusterCommand", "createRabund");
754                 exit(1);
755         }
756     
757 }
758
759 //**********************************************************************************************************************
760
761