]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added createRabund function to mgcluster
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MGClusterCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46                 helpString += "The precision parameter's default value is 100. \n";
47                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
48                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
50                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
51                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
52                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53                 helpString += "The mgcluster command should be in the following format: \n";
54                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MGClusterCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
65         try {
66         string outputFileName = "";
67                 map<string, vector<string> >::iterator it;
68         
69         //is this a type this command creates
70         it = outputTypes.find(type);
71         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
72         else {
73             if (type == "list") {  outputFileName =  "list"; }
74             else if (type == "rabund") {  outputFileName =  "rabund"; }
75             else if (type == "sabund") {  outputFileName =  "sabund"; }
76             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
77         }
78         return outputFileName;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 MGClusterCommand::MGClusterCommand(){   
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["list"] = tempOutNames;
92                 outputTypes["rabund"] = tempOutNames;
93                 outputTypes["sabund"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 MGClusterCommand::MGClusterCommand(string option) {
102         try {
103                 abort = false; calledHelp = false;   
104                 
105                 //allow user to run help
106                 if(option == "help") { help(); abort = true; calledHelp = true; }
107                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108                 
109                 else {
110                         vector<string> myArray = setParameters();
111                         
112                         OptionParser parser(option);
113                         map<string, string> parameters = parser.getParameters();
114                         
115                         ValidParameters validParameter;
116                         map<string,string>::iterator it;
117                 
118                         //check to make sure all parameters are valid for command
119                         for (it = parameters.begin(); it != parameters.end(); it++) { 
120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
121                         }
122                         
123                         //initialize outputTypes
124                         vector<string> tempOutNames;
125                         outputTypes["list"] = tempOutNames;
126                         outputTypes["rabund"] = tempOutNames;
127                         outputTypes["sabund"] = tempOutNames;
128                         
129                         //if the user changes the input directory command factory will send this info to us in the output parameter 
130                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
131                         if (inputDir == "not found"){   inputDir = "";          }
132                         else {
133                                 string path;
134                                 it = parameters.find("blast");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
140                                 }
141                                 
142                                 it = parameters.find("name");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
148                                 }
149                         }
150
151                         
152                         //check for required parameters
153                         blastfile = validParameter.validFile(parameters, "blast", true);
154                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
155                         else if (blastfile == "not found") { blastfile = ""; }
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
159                                 outputDir = ""; 
160                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
161                         }
162                         
163                         namefile = validParameter.validFile(parameters, "name", true);
164                         if (namefile == "not open") { abort = true; }   
165                         else if (namefile == "not found") { namefile = ""; }
166                         else { m->setNameFile(namefile); }
167                         
168                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
169                         
170                         //check for optional parameter and set defaults
171                         string temp;
172                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
173                         precisionLength = temp.length();
174                         m->mothurConvert(temp, precision); 
175                         
176                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
177                         m->mothurConvert(temp, cutoff); 
178                         cutoff += (5 / (precision * 10.0));
179                         
180                         method = validParameter.validFile(parameters, "method", false);
181                         if (method == "not found") { method = "average"; }
182                         
183                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
184                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
185
186                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
187                         m->mothurConvert(temp, length); 
188                         
189                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
190                         m->mothurConvert(temp, penalty); 
191                         
192                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
193                         minWanted = m->isTrue(temp); 
194                         
195                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
196                         merge = m->isTrue(temp); 
197                         
198                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
199                         hclusterWanted = m->isTrue(temp); 
200                         
201                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
202                         hard = m->isTrue(temp);
203                 }
204
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
208                 exit(1);
209         }
210 }
211 //**********************************************************************************************************************
212 int MGClusterCommand::execute(){
213         try {
214                 
215                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
216                 
217                 //read names file
218                 if (namefile != "") {
219                         nameMap = new NameAssignment(namefile);
220                         nameMap->readMap();
221                 }else{ nameMap= new NameAssignment(); }
222                 
223                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
224                 string tag = "";
225                 time_t start;
226                 float previousDist = 0.00000;
227                 float rndPreviousDist = 0.00000; 
228                 
229                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
230                 //must remember to delete those objects here since readBlast does not
231                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
232                 read->read(nameMap);
233         
234         list = new ListVector(nameMap->getListVector());
235         RAbundVector* rabund = NULL;
236         if(large) {
237             map<string, int> nameMapCounts = m->readNames(namefile);
238             RAbundVector* rabund = createRabund(list, nameMapCounts);
239         }else {
240             RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
241         }
242         
243                 
244                 //list = new ListVector(nameMap->getListVector());
245                 //RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
246                 
247                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
248                 
249                 start = time(NULL);
250                 oldList = *list;
251                 map<string, int> Seq2Bin;
252                 map<string, int> oldSeq2Bin;
253                 
254                 if (method == "furthest")               { tag = "fn";  }
255                 else if (method == "nearest")   { tag = "nn";  }
256                 else                                                    { tag = "an";  }
257                 
258         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
259         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
260         string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
261         
262                 m->openOutputFile(sabundFileName,       sabundFile);
263                 m->openOutputFile(rabundFileName,       rabundFile);
264                 m->openOutputFile(listFileName, listFile);
265                 
266                 if (m->control_pressed) { 
267                         delete nameMap; delete read; delete list; delete rabund; 
268                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
269                         outputTypes.clear();
270                         return 0; 
271                 }
272                 
273                 double saveCutoff = cutoff;
274                 
275                 if (!hclusterWanted) {
276                         //get distmatrix and overlap
277                         SparseMatrix* distMatrix = read->getDistMatrix();
278                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
279                         delete read;
280                 
281                         //create cluster
282                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
283                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
284                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
285                         cluster->setMapWanted(true);
286                         Seq2Bin = cluster->getSeqtoBin();
287                         oldSeq2Bin = Seq2Bin;
288                         
289                         if (m->control_pressed) { 
290                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
291                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
292                                 outputTypes.clear();
293                                 return 0; 
294                         }
295                 
296                         //cluster using cluster classes
297                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
298                                 
299                                 cluster->update(cutoff);
300                                 
301                                 if (m->control_pressed) { 
302                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
303                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
304                                         outputTypes.clear();
305                                         return 0; 
306                                 }
307                                 
308                                 float dist = distMatrix->getSmallDist();
309                                 float rndDist;
310                                 if (hard) {
311                                         rndDist = m->ceilDist(dist, precision); 
312                                 }else{
313                                         rndDist = m->roundDist(dist, precision); 
314                                 }
315                                 
316                                 if(previousDist <= 0.0000 && dist != previousDist){
317                                         oldList.setLabel("unique");
318                                         printData(&oldList);
319                                 }
320                                 else if(rndDist != rndPreviousDist){
321                                         if (merge) {
322                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
323                                                 
324                                                 if (m->control_pressed) { 
325                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
326                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
327                                                         outputTypes.clear();
328                                                         return 0; 
329                                                 }
330                                                 
331                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
332                                                 printData(temp);
333                                                 delete temp;
334                                         }else{
335                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
336                                                 printData(&oldList);
337                                         }
338                                 }
339         
340                                 previousDist = dist;
341                                 rndPreviousDist = rndDist;
342                                 oldList = *list;
343                                 Seq2Bin = cluster->getSeqtoBin();
344                                 oldSeq2Bin = Seq2Bin;
345                         }
346                         
347                         if(previousDist <= 0.0000){
348                                 oldList.setLabel("unique");
349                                 printData(&oldList);
350                         }
351                         else if(rndPreviousDist<cutoff){
352                                 if (merge) {
353                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
354                                         
355                                         if (m->control_pressed) { 
356                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
357                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
358                                                         outputTypes.clear();
359                                                         return 0; 
360                                         }
361                                         
362                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
363                                         printData(temp);
364                                         delete temp;
365                                 }else{
366                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
367                                         printData(&oldList);
368                                 }
369                         }
370                         
371                         //free memory
372                         overlapMatrix.clear();
373                         delete distMatrix;
374                         delete cluster;
375                         
376                 }else { //use hcluster to cluster
377                         //get distmatrix and overlap
378                         overlapFile = read->getOverlapFile();
379                         distFile = read->getDistFile(); 
380                         delete read;
381                 
382                         //sort the distance and overlap files
383                         sortHclusterFiles(distFile, overlapFile);
384                         
385                         if (m->control_pressed) { 
386                                 delete nameMap;  delete list; delete rabund; 
387                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
388                                 outputTypes.clear();
389                                 return 0; 
390                         }
391                 
392                         //create cluster
393                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
394                         hcluster->setMapWanted(true);
395                         Seq2Bin = cluster->getSeqtoBin();
396                         oldSeq2Bin = Seq2Bin;
397                         
398                         vector<seqDist> seqs; seqs.resize(1); // to start loop
399                         //ifstream inHcluster;
400                         //m->openInputFile(distFile, inHcluster);
401                         
402                         if (m->control_pressed) { 
403                                 delete nameMap;  delete list; delete rabund; delete hcluster;
404                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
405                                 outputTypes.clear();
406                                 return 0; 
407                         }
408
409                         while (seqs.size() != 0){
410                 
411                                 seqs = hcluster->getSeqs();
412                                 
413                                 //to account for cutoff change in average neighbor
414                                 if (seqs.size() != 0) {
415                                         if (seqs[0].dist > cutoff) { break; }
416                                 }
417                                 
418                                 if (m->control_pressed) { 
419                                         delete nameMap;  delete list; delete rabund; delete hcluster;
420                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
421                                         m->mothurRemove(distFile);
422                                         m->mothurRemove(overlapFile);
423                                         outputTypes.clear();
424                                         return 0; 
425                                 }
426                                 
427                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
428                                         
429                                         if (seqs[i].seq1 != seqs[i].seq2) {
430                 
431                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
432                                                 
433                                                 if (m->control_pressed) { 
434                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
435                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
436                                                         m->mothurRemove(distFile);
437                                                         m->mothurRemove(overlapFile);
438                                                         outputTypes.clear();
439                                                         return 0; 
440                                                 }
441         
442                                                 float rndDist;
443                                                 if (hard) {
444                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
445                                                 }else{
446                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
447                                                 }
448                                                                                                 
449                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
450                                                         oldList.setLabel("unique");
451                                                         printData(&oldList);
452                                                 }
453                                                 else if((rndDist != rndPreviousDist)){
454                                                         if (merge) {
455                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
456                                                                 
457                                                                 if (m->control_pressed) { 
458                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
459                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
460                                                                         m->mothurRemove(distFile);
461                                                                         m->mothurRemove(overlapFile);
462                                                                         outputTypes.clear();
463                                                                         return 0; 
464                                                                 }
465
466                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
467                                                                 printData(temp);
468                                                                 delete temp;
469                                                         }else{
470                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
471                                                                 printData(&oldList);
472                                                         }
473                                                 }
474                                                 
475                                                 previousDist = seqs[i].dist;
476                                                 rndPreviousDist = rndDist;
477                                                 oldList = *list;
478                                                 Seq2Bin = cluster->getSeqtoBin();
479                                                 oldSeq2Bin = Seq2Bin;
480                                         }
481                                 }
482                         }
483                         //inHcluster.close();
484                         
485                         if(previousDist <= 0.0000){
486                                 oldList.setLabel("unique");
487                                 printData(&oldList);
488                         }
489                         else if(rndPreviousDist<cutoff){
490                                 if (merge) {
491                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
492                                         
493                                         if (m->control_pressed) { 
494                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
495                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
496                                                         m->mothurRemove(distFile);
497                                                         m->mothurRemove(overlapFile);
498                                                         outputTypes.clear();
499                                                         return 0; 
500                                         }
501                                         
502                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
503                                         printData(temp);
504                                         delete temp;
505                                 }else{
506                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
507                                         printData(&oldList);
508                                 }
509                         }
510                         
511                         delete hcluster;
512                         m->mothurRemove(distFile);
513                         m->mothurRemove(overlapFile);
514                 }
515                 
516                 delete list; 
517                 delete rabund;
518                 listFile.close();
519                 sabundFile.close();
520                 rabundFile.close();
521         
522                 if (m->control_pressed) { 
523                         delete nameMap; 
524                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
525                         outputTypes.clear();
526                         return 0; 
527                 }
528                 
529                 m->mothurOutEndLine();
530                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
532                 m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
533                 m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
534                 m->mothurOutEndLine();
535                 
536                 if (saveCutoff != cutoff) { 
537                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
538                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
539                         
540                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
541                 }
542                 
543                 //set list file as new current listfile
544                 string current = "";
545                 itTypes = outputTypes.find("list");
546                 if (itTypes != outputTypes.end()) {
547                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
548                 }
549                 
550                 //set rabund file as new current rabundfile
551                 itTypes = outputTypes.find("rabund");
552                 if (itTypes != outputTypes.end()) {
553                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
554                 }
555                 
556                 //set sabund file as new current sabundfile
557                 itTypes = outputTypes.find("sabund");
558                 if (itTypes != outputTypes.end()) {
559                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
560                 }
561                 
562                 
563                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
564                         
565                 return 0;
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "MGClusterCommand", "execute");
569                 exit(1);
570         }
571 }
572 //**********************************************************************************************************************
573 void MGClusterCommand::printData(ListVector* mergedList){
574         try {
575                 mergedList->print(listFile);
576                 mergedList->getRAbundVector().print(rabundFile);
577                 
578                 SAbundVector sabund = mergedList->getSAbundVector();
579
580                 sabund.print(cout);
581                 sabund.print(sabundFile);
582         }
583         catch(exception& e) {
584                 m->errorOut(e, "MGClusterCommand", "printData");
585                 exit(1);
586         }
587 }
588 //**********************************************************************************************************************
589 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
590 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
591 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
592         try {
593                 //create new listvector so you don't overwrite the clustering
594                 ListVector* newList = new ListVector(oldList);
595
596                 bool done = false;
597                 ifstream inOverlap;
598                 int count = 0;
599                 
600                 if (hclusterWanted) {  
601                         m->openInputFile(overlapFile, inOverlap);  
602                         if (inOverlap.eof()) {  done = true;  }
603                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
604                 
605                 while (!done) {
606                         if (m->control_pressed) { 
607                                 if (hclusterWanted) {   inOverlap.close();  }           
608                                 return newList;
609                         }
610                         
611                         //get next overlap
612                         seqDist overlapNode;
613                         if (!hclusterWanted) {  
614                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
615                                         overlapNode = overlapMatrix[count];
616                                         count++;
617                                 }else { break; }
618                         }else { 
619                                 if (!inOverlap.eof()) {
620                                         string firstName, secondName;
621                                         float overlapDistance;
622                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
623                                         
624                                         //commented out because we check this in readblast already
625                                         //map<string,int>::iterator itA = nameMap->find(firstName);
626                                         //map<string,int>::iterator itB = nameMap->find(secondName);
627                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
628                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
629                                         
630                                         //overlapNode.seq1 = itA->second;
631                                         //overlapNode.seq2 = itB->second;
632                                         overlapNode.seq1 = nameMap->get(firstName);
633                                         overlapNode.seq2 = nameMap->get(secondName);
634                                         overlapNode.dist = overlapDistance;
635                                 }else { inOverlap.close(); break; }
636                         } 
637                 
638                         if (overlapNode.dist < dist) {
639                                 //get names of seqs that overlap
640                                 string name1 = nameMap->get(overlapNode.seq1);
641                                 string name2 = nameMap->get(overlapNode.seq2);
642                         
643                                 //use binInfo to find out if they are already in the same bin
644                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
645                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
646                                 
647                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
648                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
649
650                                 //int binKeep = itBin1->second;
651                                 //int binRemove = itBin2->second;
652                                 
653                                 int binKeep = binInfo[name1];
654                                 int binRemove = binInfo[name2];
655                         
656                                 //if not merge bins and update binInfo
657                                 if(binKeep != binRemove) {
658                 
659                                         //save names in old bin
660                                         string names = newList->get(binRemove);
661                 
662                                         //merge bins into name1s bin
663                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
664                                         newList->set(binRemove, "");    
665                                         
666                                         //update binInfo
667                                         while (names.find_first_of(',') != -1) { 
668                                                 //get name from bin
669                                                 string name = names.substr(0,names.find_first_of(','));
670                                                 //save name and bin number
671                                                 binInfo[name] = binKeep;
672                                                 names = names.substr(names.find_first_of(',')+1, names.length());
673                                         }
674                                         
675                                         //get last name
676                                         binInfo[names] = binKeep;
677                                 }
678                                 
679                         }else { done = true; }
680                 }
681                 
682                 //return listvector
683                 return newList;
684                                 
685         }
686         catch(exception& e) {
687                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
688                 exit(1);
689         }
690 }
691 //**********************************************************************************************************************
692 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
693         try {
694                 //sort distFile
695                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
696                 m->mothurRemove(unsortedDist);  //delete unsorted file
697                 distFile = sortedDistFile;
698                 
699                 //sort overlap file
700                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
701                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
702                 overlapFile = sortedOverlapFile;
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
706                 exit(1);
707         }
708 }
709
710 //**********************************************************************************************************************
711
712 RAbundVector MGClusterCommand::createRabund(ListVector list, map<string, int> nameMapCounts){
713     for(int i = 0; i < list->getNumBins(); i++) { 
714     
715     }
716 }
717
718 //**********************************************************************************************************************
719
720