]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
working on mgcluster count file change
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
18         //CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
19                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
20                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
21                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
22                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
23                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
24                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
25                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
26                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
27                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "MGClusterCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string MGClusterCommand::getHelpString(){       
42         try {
43                 string helpString = "";
44                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
45                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
46                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
47                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
48                 helpString += "The precision parameter's default value is 100. \n";
49                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
50                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
51                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
52                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
53                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
54                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
55                 helpString += "The mgcluster command should be in the following format: \n";
56                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "MGClusterCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
67         try {
68         string outputFileName = "";
69                 map<string, vector<string> >::iterator it;
70         
71         //is this a type this command creates
72         it = outputTypes.find(type);
73         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74         else {
75             if (type == "list") {  outputFileName =  "list"; }
76             else if (type == "rabund") {  outputFileName =  "rabund"; }
77             else if (type == "sabund") {  outputFileName =  "sabund"; }
78             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
79         }
80         return outputFileName;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 MGClusterCommand::MGClusterCommand(){   
89         try {
90                 abort = true; calledHelp = true; 
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["list"] = tempOutNames;
94                 outputTypes["rabund"] = tempOutNames;
95                 outputTypes["sabund"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 MGClusterCommand::MGClusterCommand(string option) {
104         try {
105                 abort = false; calledHelp = false;   
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string, string> parameters = parser.getParameters();
116                         
117                         ValidParameters validParameter;
118                         map<string,string>::iterator it;
119                 
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["list"] = tempOutNames;
128                         outputTypes["rabund"] = tempOutNames;
129                         outputTypes["sabund"] = tempOutNames;
130                         
131                         //if the user changes the input directory command factory will send this info to us in the output parameter 
132                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
133                         if (inputDir == "not found"){   inputDir = "";          }
134                         else {
135                                 string path;
136                                 it = parameters.find("blast");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
142                                 }
143                                 
144                                 it = parameters.find("name");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
150                                 }
151                         }
152
153                         
154                         //check for required parameters
155                         blastfile = validParameter.validFile(parameters, "blast", true);
156                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
157                         else if (blastfile == "not found") { blastfile = ""; }
158                         
159                         //if the user changes the output directory command factory will send this info to us in the output parameter 
160                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
161                                 outputDir = ""; 
162                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
163                         }
164                         
165                         namefile = validParameter.validFile(parameters, "name", true);
166                         if (namefile == "not open") { abort = true; }   
167                         else if (namefile == "not found") { namefile = ""; }
168                         else { m->setNameFile(namefile); }
169             
170             countfile = validParameter.validFile(parameters, "count", true);
171                         if (countfile == "not open") { abort = true; }  
172                         else if (countfile == "not found") { countfile = ""; }
173             else { m->setCountTableFile(countfile); }
174             
175             if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
176                         
177                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
178                         
179                         //check for optional parameter and set defaults
180                         string temp;
181             temp = validParameter.validFile(parameters, "large", false);                        if (temp == "not found") { temp = "false"; }            
182                         large = m->isTrue(temp); 
183             
184                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
185                         precisionLength = temp.length();
186                         m->mothurConvert(temp, precision); 
187                         
188                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
189                         m->mothurConvert(temp, cutoff); 
190                         cutoff += (5 / (precision * 10.0));
191                         
192                         method = validParameter.validFile(parameters, "method", false);
193                         if (method == "not found") { method = "average"; }
194                         
195                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
196                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
197
198                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
199                         m->mothurConvert(temp, length); 
200                         
201                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
202                         m->mothurConvert(temp, penalty); 
203                         
204                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
205                         minWanted = m->isTrue(temp); 
206                         
207                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
208                         merge = m->isTrue(temp); 
209                         
210                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
211                         hclusterWanted = m->isTrue(temp); 
212                         
213                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
214                         hard = m->isTrue(temp);            
215                 }
216
217         }
218         catch(exception& e) {
219                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
220                 exit(1);
221         }
222 }
223 //**********************************************************************************************************************
224 int MGClusterCommand::execute(){
225         try {
226                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
227                 
228                 //read names file
229                 if (namefile != "") {
230                         nameMap = new NameAssignment(namefile);
231                         nameMap->readMap();
232                 }else{ nameMap= new NameAssignment(); }
233                 
234                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
235                 string tag = "";
236                 time_t start;
237                 float previousDist = 0.00000;
238                 float rndPreviousDist = 0.00000; 
239         
240                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
241                 //must remember to delete those objects here since readBlast does not
242                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
243                 read->read(nameMap);
244         
245         list = new ListVector(nameMap->getListVector());
246         RAbundVector* rabund = NULL;
247         
248         if(countfile != "") {
249             //map<string, int> nameMapCounts = m->readNames(namefile);
250             ct = new CountTable();
251             ct->readTable(countfile);
252             rabund = new RAbundVector();
253             createRabund(ct, list, rabund);
254         }else {
255             rabund = new RAbundVector(list->getRAbundVector());
256         }
257         
258                 
259                 //list = new ListVector(nameMap->getListVector());
260                 //rabund = new RAbundVector(list->getRAbundVector());
261                 
262                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
263                 
264                 start = time(NULL);
265                 oldList = *list;
266                 map<string, int> Seq2Bin;
267                 map<string, int> oldSeq2Bin;
268                 
269                 if (method == "furthest")               { tag = "fn";  }
270                 else if (method == "nearest")   { tag = "nn";  }
271                 else                                                    { tag = "an";  }
272                 
273         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
274         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
275         string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
276         
277                 m->openOutputFile(sabundFileName,       sabundFile);
278                 m->openOutputFile(rabundFileName,       rabundFile);
279                 m->openOutputFile(listFileName, listFile);
280                 
281                 if (m->control_pressed) { 
282                         delete nameMap; delete read; delete list; delete rabund; 
283                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
284                         outputTypes.clear();
285                         return 0; 
286                 }
287                 
288                 double saveCutoff = cutoff;
289                 
290                 if (!hclusterWanted) {
291                         //get distmatrix and overlap
292                         SparseMatrix* distMatrix = read->getDistMatrix();
293                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
294                         delete read;
295                 
296                         //create cluster
297                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
298                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
299                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
300                         cluster->setMapWanted(true);
301                         Seq2Bin = cluster->getSeqtoBin();
302                         oldSeq2Bin = Seq2Bin;
303                         
304                         if (m->control_pressed) { 
305                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
306                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
307                                 outputTypes.clear();
308                                 return 0; 
309                         }
310                 
311                         //cluster using cluster classes
312                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
313                                 
314                                 cluster->update(cutoff);
315                                 
316                                 if (m->control_pressed) { 
317                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
318                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
319                                         outputTypes.clear();
320                                         return 0; 
321                                 }
322                                 
323                                 float dist = distMatrix->getSmallDist();
324                                 float rndDist;
325                                 if (hard) {
326                                         rndDist = m->ceilDist(dist, precision); 
327                                 }else{
328                                         rndDist = m->roundDist(dist, precision); 
329                                 }
330                                 
331                                 if(previousDist <= 0.0000 && dist != previousDist){
332                                         oldList.setLabel("unique");
333                                         printData(&oldList);
334                                 }
335                                 else if(rndDist != rndPreviousDist){
336                                         if (merge) {
337                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
338                                                 
339                                                 if (m->control_pressed) { 
340                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
341                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
342                                                         outputTypes.clear();
343                                                         return 0; 
344                                                 }
345                                                 
346                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
347                                                 printData(temp);
348                                                 delete temp;
349                                         }else{
350                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
351                                                 printData(&oldList);
352                                         }
353                                 }
354         
355                                 previousDist = dist;
356                                 rndPreviousDist = rndDist;
357                                 oldList = *list;
358                                 Seq2Bin = cluster->getSeqtoBin();
359                                 oldSeq2Bin = Seq2Bin;
360                         }
361                         
362                         if(previousDist <= 0.0000){
363                                 oldList.setLabel("unique");
364                                 printData(&oldList);
365                         }
366                         else if(rndPreviousDist<cutoff){
367                                 if (merge) {
368                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
369                                         
370                                         if (m->control_pressed) { 
371                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
372                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
373                                                         outputTypes.clear();
374                                                         return 0; 
375                                         }
376                                         
377                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
378                                         printData(temp);
379                                         delete temp;
380                                 }else{
381                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
382                                         printData(&oldList);
383                                 }
384                         }
385                         
386                         //free memory
387                         overlapMatrix.clear();
388                         delete distMatrix;
389                         delete cluster;
390                         
391                 }else { //use hcluster to cluster
392                         //get distmatrix and overlap
393                         overlapFile = read->getOverlapFile();
394                         distFile = read->getDistFile(); 
395                         delete read;
396                 
397                         //sort the distance and overlap files
398                         sortHclusterFiles(distFile, overlapFile);
399                         
400                         if (m->control_pressed) { 
401                                 delete nameMap;  delete list; delete rabund; 
402                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
403                                 outputTypes.clear();
404                                 return 0; 
405                         }
406                 
407                         //create cluster
408                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
409                         hcluster->setMapWanted(true);
410                         Seq2Bin = cluster->getSeqtoBin();
411                         oldSeq2Bin = Seq2Bin;
412                         
413                         vector<seqDist> seqs; seqs.resize(1); // to start loop
414                         //ifstream inHcluster;
415                         //m->openInputFile(distFile, inHcluster);
416                         
417                         if (m->control_pressed) { 
418                                 delete nameMap;  delete list; delete rabund; delete hcluster;
419                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
420                                 outputTypes.clear();
421                                 return 0; 
422                         }
423
424                         while (seqs.size() != 0){
425                 
426                                 seqs = hcluster->getSeqs();
427                                 
428                                 //to account for cutoff change in average neighbor
429                                 if (seqs.size() != 0) {
430                                         if (seqs[0].dist > cutoff) { break; }
431                                 }
432                                 
433                                 if (m->control_pressed) { 
434                                         delete nameMap;  delete list; delete rabund; delete hcluster;
435                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
436                                         m->mothurRemove(distFile);
437                                         m->mothurRemove(overlapFile);
438                                         outputTypes.clear();
439                                         return 0; 
440                                 }
441                                 
442                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
443                                         
444                                         if (seqs[i].seq1 != seqs[i].seq2) {
445                 
446                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
447                                                 
448                                                 if (m->control_pressed) { 
449                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
450                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
451                                                         m->mothurRemove(distFile);
452                                                         m->mothurRemove(overlapFile);
453                                                         outputTypes.clear();
454                                                         return 0; 
455                                                 }
456         
457                                                 float rndDist;
458                                                 if (hard) {
459                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
460                                                 }else{
461                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
462                                                 }
463                                                                                                 
464                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
465                                                         oldList.setLabel("unique");
466                                                         printData(&oldList);
467                                                 }
468                                                 else if((rndDist != rndPreviousDist)){
469                                                         if (merge) {
470                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
471                                                                 
472                                                                 if (m->control_pressed) { 
473                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
474                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
475                                                                         m->mothurRemove(distFile);
476                                                                         m->mothurRemove(overlapFile);
477                                                                         outputTypes.clear();
478                                                                         return 0; 
479                                                                 }
480
481                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
482                                                                 printData(temp);
483                                                                 delete temp;
484                                                         }else{
485                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
486                                                                 printData(&oldList);
487                                                         }
488                                                 }
489                                                 
490                                                 previousDist = seqs[i].dist;
491                                                 rndPreviousDist = rndDist;
492                                                 oldList = *list;
493                                                 Seq2Bin = cluster->getSeqtoBin();
494                                                 oldSeq2Bin = Seq2Bin;
495                                         }
496                                 }
497                         }
498                         //inHcluster.close();
499                         
500                         if(previousDist <= 0.0000){
501                                 oldList.setLabel("unique");
502                                 printData(&oldList);
503                         }
504                         else if(rndPreviousDist<cutoff){
505                                 if (merge) {
506                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
507                                         
508                                         if (m->control_pressed) { 
509                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
510                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
511                                                         m->mothurRemove(distFile);
512                                                         m->mothurRemove(overlapFile);
513                                                         outputTypes.clear();
514                                                         return 0; 
515                                         }
516                                         
517                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
518                                         printData(temp);
519                                         delete temp;
520                                 }else{
521                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
522                                         printData(&oldList);
523                                 }
524                         }
525                         
526                         delete hcluster;
527                         m->mothurRemove(distFile);
528                         m->mothurRemove(overlapFile);
529                 }
530                 
531                 delete list;
532                 if (!large) {delete rabund;}
533                 listFile.close();
534                 sabundFile.close();
535                 rabundFile.close();
536         
537                 if (m->control_pressed) { 
538                         delete nameMap; 
539                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
540                         outputTypes.clear();
541                         return 0; 
542                 }
543                 
544                 m->mothurOutEndLine();
545                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
546                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
547                 m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
548                 m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
549                 m->mothurOutEndLine();
550                 
551                 if (saveCutoff != cutoff) { 
552                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
553                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
554                         
555                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
556                 }
557                 
558                 //set list file as new current listfile
559                 string current = "";
560                 itTypes = outputTypes.find("list");
561                 if (itTypes != outputTypes.end()) {
562                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
563                 }
564                 
565                 //set rabund file as new current rabundfile
566                 itTypes = outputTypes.find("rabund");
567                 if (itTypes != outputTypes.end()) {
568                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
569                 }
570                 
571                 //set sabund file as new current sabundfile
572                 itTypes = outputTypes.find("sabund");
573                 if (itTypes != outputTypes.end()) {
574                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
575                 }
576                 
577                 
578                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
579                         
580                 return 0;
581         }
582         catch(exception& e) {
583                 m->errorOut(e, "MGClusterCommand", "execute");
584                 exit(1);
585         }
586 }
587 //**********************************************************************************************************************
588 void MGClusterCommand::printData(ListVector* mergedList){
589         try {
590                 mergedList->print(listFile);
591                 mergedList->getRAbundVector().print(rabundFile);
592                 
593                 SAbundVector sabund = mergedList->getSAbundVector();
594
595                 sabund.print(cout);
596                 sabund.print(sabundFile);
597         }
598         catch(exception& e) {
599                 m->errorOut(e, "MGClusterCommand", "printData");
600                 exit(1);
601         }
602 }
603 //**********************************************************************************************************************
604 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
605 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
606 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
607         try {
608                 //create new listvector so you don't overwrite the clustering
609                 ListVector* newList = new ListVector(oldList);
610
611                 bool done = false;
612                 ifstream inOverlap;
613                 int count = 0;
614                 
615                 if (hclusterWanted) {  
616                         m->openInputFile(overlapFile, inOverlap);  
617                         if (inOverlap.eof()) {  done = true;  }
618                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
619                 
620                 while (!done) {
621                         if (m->control_pressed) { 
622                                 if (hclusterWanted) {   inOverlap.close();  }           
623                                 return newList;
624                         }
625                         
626                         //get next overlap
627                         seqDist overlapNode;
628                         if (!hclusterWanted) {  
629                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
630                                         overlapNode = overlapMatrix[count];
631                                         count++;
632                                 }else { break; }
633                         }else { 
634                                 if (!inOverlap.eof()) {
635                                         string firstName, secondName;
636                                         float overlapDistance;
637                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
638                                         
639                                         //commented out because we check this in readblast already
640                                         //map<string,int>::iterator itA = nameMap->find(firstName);
641                                         //map<string,int>::iterator itB = nameMap->find(secondName);
642                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
643                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
644                                         
645                                         //overlapNode.seq1 = itA->second;
646                                         //overlapNode.seq2 = itB->second;
647                                         overlapNode.seq1 = nameMap->get(firstName);
648                                         overlapNode.seq2 = nameMap->get(secondName);
649                                         overlapNode.dist = overlapDistance;
650                                 }else { inOverlap.close(); break; }
651                         } 
652                 
653                         if (overlapNode.dist < dist) {
654                                 //get names of seqs that overlap
655                                 string name1 = nameMap->get(overlapNode.seq1);
656                                 string name2 = nameMap->get(overlapNode.seq2);
657                         
658                                 //use binInfo to find out if they are already in the same bin
659                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
660                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
661                                 
662                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
663                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
664
665                                 //int binKeep = itBin1->second;
666                                 //int binRemove = itBin2->second;
667                                 
668                                 int binKeep = binInfo[name1];
669                                 int binRemove = binInfo[name2];
670                         
671                                 //if not merge bins and update binInfo
672                                 if(binKeep != binRemove) {
673                 
674                                         //save names in old bin
675                                         string names = newList->get(binRemove);
676                 
677                                         //merge bins into name1s bin
678                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
679                                         newList->set(binRemove, "");    
680                                         
681                                         //update binInfo
682                                         while (names.find_first_of(',') != -1) { 
683                                                 //get name from bin
684                                                 string name = names.substr(0,names.find_first_of(','));
685                                                 //save name and bin number
686                                                 binInfo[name] = binKeep;
687                                                 names = names.substr(names.find_first_of(',')+1, names.length());
688                                         }
689                                         
690                                         //get last name
691                                         binInfo[names] = binKeep;
692                                 }
693                                 
694                         }else { done = true; }
695                 }
696                 
697                 //return listvector
698                 return newList;
699                                 
700         }
701         catch(exception& e) {
702                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
703                 exit(1);
704         }
705 }
706 //**********************************************************************************************************************
707 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
708         try {
709                 //sort distFile
710                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
711                 m->mothurRemove(unsortedDist);  //delete unsorted file
712                 distFile = sortedDistFile;
713                 
714                 //sort overlap file
715                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
716                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
717                 overlapFile = sortedOverlapFile;
718         }
719         catch(exception& e) {
720                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
721                 exit(1);
722         }
723 }
724
725 //**********************************************************************************************************************
726
727 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
728     try {
729         //vector<string> names = ct.getNamesOfSeqs();
730
731         //for ( int i; i < ct.getNumGroups(); i++ ) {    rav.push_back( ct.getNumSeqs(names[i]) );    }
732         //return rav;
733         
734         for(int i = 0; i < list->getNumBins(); i++) { 
735            vector<string> binNames;
736            string bin = list->get(i);
737            m->splitAtComma(bin, binNames);
738            int total = 0;
739            for (int j = 0; j < binNames.size(); j++) { 
740                total += ct->getNumSeqs(binNames[j]);
741            }
742            rabund->push_back(total);   
743        }
744         
745         
746     }
747     catch(exception& e) {
748                 m->errorOut(e, "MGClusterCommand", "createRabund");
749                 exit(1);
750         }
751     
752 }
753
754 //**********************************************************************************************************************
755
756