]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
fixed bug in phylo.diversity rooting. added filename patterns and create filename...
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
17                 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
26                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){       
41         try {
42                 string helpString = "";
43                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47                 helpString += "The precision parameter's default value is 100. \n";
48                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
49                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
51                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
52                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
53                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54                 helpString += "The mgcluster command should be in the following format: \n";
55                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MGClusterCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } 
70         else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; } 
71         else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
72         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
73         
74         return pattern;
75     }
76     catch(exception& e) {
77         m->errorOut(e, "MGClusterCommand", "getOutputPattern");
78         exit(1);
79     }
80 }
81 //*******************************************************************************************************************
82 MGClusterCommand::MGClusterCommand(){   
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["list"] = tempOutNames;
88                 outputTypes["rabund"] = tempOutNames;
89                 outputTypes["sabund"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97 MGClusterCommand::MGClusterCommand(string option) {
98         try {
99                 abort = false; calledHelp = false;   
100                 
101                 //allow user to run help
102                 if(option == "help") { help(); abort = true; calledHelp = true; }
103                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104                 
105                 else {
106                         vector<string> myArray = setParameters();
107                         
108                         OptionParser parser(option);
109                         map<string, string> parameters = parser.getParameters();
110                         
111                         ValidParameters validParameter;
112                         map<string,string>::iterator it;
113                 
114                         //check to make sure all parameters are valid for command
115                         for (it = parameters.begin(); it != parameters.end(); it++) { 
116                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
117                         }
118                         
119                         //initialize outputTypes
120                         vector<string> tempOutNames;
121                         outputTypes["list"] = tempOutNames;
122                         outputTypes["rabund"] = tempOutNames;
123                         outputTypes["sabund"] = tempOutNames;
124                         
125                         //if the user changes the input directory command factory will send this info to us in the output parameter 
126                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
127                         if (inputDir == "not found"){   inputDir = "";          }
128                         else {
129                                 string path;
130                                 it = parameters.find("blast");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
136                                 }
137                                 
138                                 it = parameters.find("name");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
144                                 }
145                 
146                 it = parameters.find("count");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
152                                 }
153                         }
154
155                         
156                         //check for required parameters
157                         blastfile = validParameter.validFile(parameters, "blast", true);
158                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
159                         else if (blastfile == "not found") { blastfile = ""; }
160                         
161                         //if the user changes the output directory command factory will send this info to us in the output parameter 
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
163                                 outputDir = ""; 
164                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
165                         }
166                         
167                         namefile = validParameter.validFile(parameters, "name", true);
168                         if (namefile == "not open") { abort = true; }   
169                         else if (namefile == "not found") { namefile = ""; }
170                         else { m->setNameFile(namefile); }
171             
172             countfile = validParameter.validFile(parameters, "count", true);
173                         if (countfile == "not open") { abort = true; }  
174                         else if (countfile == "not found") { countfile = ""; }
175             else { m->setCountTableFile(countfile); }
176             
177             if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
178                         
179                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
180                         
181                         //check for optional parameter and set defaults
182                         string temp;
183             temp = validParameter.validFile(parameters, "precision", false);            if (temp == "not found") { temp = "100"; }
184                         precisionLength = temp.length();
185                         m->mothurConvert(temp, precision); 
186                         
187                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
188                         m->mothurConvert(temp, cutoff); 
189                         cutoff += (5 / (precision * 10.0));
190                         
191                         method = validParameter.validFile(parameters, "method", false);
192                         if (method == "not found") { method = "average"; }
193                         
194                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
195                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
196
197                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
198                         m->mothurConvert(temp, length); 
199                         
200                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
201                         m->mothurConvert(temp, penalty); 
202                         
203                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
204                         minWanted = m->isTrue(temp); 
205                         
206                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
207                         merge = m->isTrue(temp); 
208                         
209                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
210                         hclusterWanted = m->isTrue(temp); 
211                         
212                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
213                         hard = m->isTrue(temp);            
214                 }
215
216         }
217         catch(exception& e) {
218                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
219                 exit(1);
220         }
221 }
222 //**********************************************************************************************************************
223 int MGClusterCommand::execute(){
224         try {
225                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
226                 
227                 //read names file
228                 if (namefile != "") {
229                         nameMap = new NameAssignment(namefile);
230                         nameMap->readMap();
231                 }else{ nameMap= new NameAssignment(); }
232                 
233                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
234                 string tag = "";
235                 time_t start;
236                 float previousDist = 0.00000;
237                 float rndPreviousDist = 0.00000; 
238         
239                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
240                 //must remember to delete those objects here since readBlast does not
241                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
242                 read->read(nameMap);
243         
244         list = new ListVector(nameMap->getListVector());
245         RAbundVector* rabund = NULL;
246         
247         if(countfile != "") {
248             //map<string, int> nameMapCounts = m->readNames(namefile);
249             ct = new CountTable();
250             ct->readTable(countfile);
251             rabund = new RAbundVector();
252             createRabund(ct, list, rabund);
253         }else {
254             rabund = new RAbundVector(list->getRAbundVector());
255         }
256         
257                 
258                 //list = new ListVector(nameMap->getListVector());
259                 //rabund = new RAbundVector(list->getRAbundVector());
260                 
261                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
262                 
263                 start = time(NULL);
264                 oldList = *list;
265                 map<string, int> Seq2Bin;
266                 map<string, int> oldSeq2Bin;
267                 
268                 if (method == "furthest")               { tag = "fn";  }
269                 else if (method == "nearest")   { tag = "nn";  }
270                 else                                                    { tag = "an";  }
271                 
272         map<string, string> variables; 
273         variables["[filename]"] = fileroot;
274         if (countfile != "") { variables["[tag2]"] = "unique_list"; }
275         variables["[clustertag]"] = tag;
276         string sabundFileName = getOutputFileName("sabund", variables);
277         string rabundFileName = getOutputFileName("rabund", variables);
278         string listFileName = getOutputFileName("list", variables);
279         
280         if (countfile == "") {
281             m->openOutputFile(sabundFileName,   sabundFile);
282             m->openOutputFile(rabundFileName,   rabundFile);
283         }
284                 m->openOutputFile(listFileName, listFile);
285                 
286                 if (m->control_pressed) { 
287                         delete nameMap; delete read; delete list; delete rabund; 
288                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
289                         outputTypes.clear();
290                         return 0; 
291                 }
292                 
293                 double saveCutoff = cutoff;
294                 
295                 if (!hclusterWanted) {
296                         //get distmatrix and overlap
297                         SparseDistanceMatrix* distMatrix = read->getDistMatrix();
298                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
299                         delete read;
300                 
301                         //create cluster
302                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
303                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
304                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
305                         cluster->setMapWanted(true);
306                         Seq2Bin = cluster->getSeqtoBin();
307                         oldSeq2Bin = Seq2Bin;
308                         
309                         if (m->control_pressed) { 
310                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
311                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
312                                 outputTypes.clear();
313                                 return 0; 
314                         }
315                 
316                         //cluster using cluster classes
317                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
318                                 
319                                 cluster->update(cutoff);
320                                 
321                                 if (m->control_pressed) { 
322                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
323                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
324                                         outputTypes.clear();
325                                         return 0; 
326                                 }
327                                 
328                                 float dist = distMatrix->getSmallDist();
329                                 float rndDist;
330                                 if (hard) {
331                                         rndDist = m->ceilDist(dist, precision); 
332                                 }else{
333                                         rndDist = m->roundDist(dist, precision); 
334                                 }
335                                 
336                                 if(previousDist <= 0.0000 && dist != previousDist){
337                                         oldList.setLabel("unique");
338                                         printData(&oldList);
339                                 }
340                                 else if(rndDist != rndPreviousDist){
341                                         if (merge) {
342                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
343                                                 
344                                                 if (m->control_pressed) { 
345                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
346                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
347                                                         outputTypes.clear();
348                                                         return 0; 
349                                                 }
350                                                 
351                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
352                                                 printData(temp);
353                                                 delete temp;
354                                         }else{
355                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
356                                                 printData(&oldList);
357                                         }
358                                 }
359         
360                                 previousDist = dist;
361                                 rndPreviousDist = rndDist;
362                                 oldList = *list;
363                                 Seq2Bin = cluster->getSeqtoBin();
364                                 oldSeq2Bin = Seq2Bin;
365                         }
366                         
367                         if(previousDist <= 0.0000){
368                                 oldList.setLabel("unique");
369                                 printData(&oldList);
370                         }
371                         else if(rndPreviousDist<cutoff){
372                                 if (merge) {
373                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
374                                         
375                                         if (m->control_pressed) { 
376                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
377                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
378                                                         outputTypes.clear();
379                                                         return 0; 
380                                         }
381                                         
382                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
383                                         printData(temp);
384                                         delete temp;
385                                 }else{
386                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
387                                         printData(&oldList);
388                                 }
389                         }
390                         
391                         //free memory
392                         overlapMatrix.clear();
393                         delete distMatrix;
394                         delete cluster;
395                         
396                 }else { //use hcluster to cluster
397                         //get distmatrix and overlap
398                         overlapFile = read->getOverlapFile();
399                         distFile = read->getDistFile(); 
400                         delete read;
401                 
402                         //sort the distance and overlap files
403                         sortHclusterFiles(distFile, overlapFile);
404                         
405                         if (m->control_pressed) { 
406                                 delete nameMap;  delete list; delete rabund; 
407                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
408                                 outputTypes.clear();
409                                 return 0; 
410                         }
411                 
412                         //create cluster
413                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
414                         hcluster->setMapWanted(true);
415                         Seq2Bin = cluster->getSeqtoBin();
416                         oldSeq2Bin = Seq2Bin;
417                         
418                         vector<seqDist> seqs; seqs.resize(1); // to start loop
419                         //ifstream inHcluster;
420                         //m->openInputFile(distFile, inHcluster);
421                         
422                         if (m->control_pressed) { 
423                                 delete nameMap;  delete list; delete rabund; delete hcluster;
424                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
425                                 outputTypes.clear();
426                                 return 0; 
427                         }
428
429                         while (seqs.size() != 0){
430                 
431                                 seqs = hcluster->getSeqs();
432                                 
433                                 //to account for cutoff change in average neighbor
434                                 if (seqs.size() != 0) {
435                                         if (seqs[0].dist > cutoff) { break; }
436                                 }
437                                 
438                                 if (m->control_pressed) { 
439                                         delete nameMap;  delete list; delete rabund; delete hcluster;
440                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
441                                         m->mothurRemove(distFile);
442                                         m->mothurRemove(overlapFile);
443                                         outputTypes.clear();
444                                         return 0; 
445                                 }
446                                 
447                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
448                                         
449                                         if (seqs[i].seq1 != seqs[i].seq2) {
450                 
451                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
452                                                 
453                                                 if (m->control_pressed) { 
454                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
455                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
456                                                         m->mothurRemove(distFile);
457                                                         m->mothurRemove(overlapFile);
458                                                         outputTypes.clear();
459                                                         return 0; 
460                                                 }
461         
462                                                 float rndDist;
463                                                 if (hard) {
464                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
465                                                 }else{
466                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
467                                                 }
468                                                                                                 
469                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
470                                                         oldList.setLabel("unique");
471                                                         printData(&oldList);
472                                                 }
473                                                 else if((rndDist != rndPreviousDist)){
474                                                         if (merge) {
475                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
476                                                                 
477                                                                 if (m->control_pressed) { 
478                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
479                                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
480                                                                         m->mothurRemove(distFile);
481                                                                         m->mothurRemove(overlapFile);
482                                                                         outputTypes.clear();
483                                                                         return 0; 
484                                                                 }
485
486                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
487                                                                 printData(temp);
488                                                                 delete temp;
489                                                         }else{
490                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
491                                                                 printData(&oldList);
492                                                         }
493                                                 }
494                                                 
495                                                 previousDist = seqs[i].dist;
496                                                 rndPreviousDist = rndDist;
497                                                 oldList = *list;
498                                                 Seq2Bin = cluster->getSeqtoBin();
499                                                 oldSeq2Bin = Seq2Bin;
500                                         }
501                                 }
502                         }
503                         //inHcluster.close();
504                         
505                         if(previousDist <= 0.0000){
506                                 oldList.setLabel("unique");
507                                 printData(&oldList);
508                         }
509                         else if(rndPreviousDist<cutoff){
510                                 if (merge) {
511                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
512                                         
513                                         if (m->control_pressed) { 
514                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
515                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
516                                                         m->mothurRemove(distFile);
517                                                         m->mothurRemove(overlapFile);
518                                                         outputTypes.clear();
519                                                         return 0; 
520                                         }
521                                         
522                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
523                                         printData(temp);
524                                         delete temp;
525                                 }else{
526                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
527                                         printData(&oldList);
528                                 }
529                         }
530                         
531                         delete hcluster;
532                         m->mothurRemove(distFile);
533                         m->mothurRemove(overlapFile);
534                 }
535                 
536                 delete list;
537                 delete rabund;
538                 listFile.close();
539         if (countfile == "") {
540             sabundFile.close();
541             rabundFile.close();
542         }
543                 if (m->control_pressed) { 
544                         delete nameMap; 
545                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
546                         outputTypes.clear();
547                         return 0; 
548                 }
549                 
550                 m->mothurOutEndLine();
551                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
552                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
553                 if (countfile == "") {
554             m->mothurOut(rabundFileName); m->mothurOutEndLine();        outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
555             m->mothurOut(sabundFileName); m->mothurOutEndLine();        outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
556         }
557                 m->mothurOutEndLine();
558                 
559                 if (saveCutoff != cutoff) { 
560                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
561                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
562                         
563                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
564                 }
565                 
566                 //set list file as new current listfile
567                 string current = "";
568                 itTypes = outputTypes.find("list");
569                 if (itTypes != outputTypes.end()) {
570                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
571                 }
572                 
573                 //set rabund file as new current rabundfile
574                 itTypes = outputTypes.find("rabund");
575                 if (itTypes != outputTypes.end()) {
576                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
577                 }
578                 
579                 //set sabund file as new current sabundfile
580                 itTypes = outputTypes.find("sabund");
581                 if (itTypes != outputTypes.end()) {
582                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
583                 }
584                 
585                 
586                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
587                         
588                 return 0;
589         }
590         catch(exception& e) {
591                 m->errorOut(e, "MGClusterCommand", "execute");
592                 exit(1);
593         }
594 }
595 //**********************************************************************************************************************
596 void MGClusterCommand::printData(ListVector* mergedList){
597         try {
598                 mergedList->print(listFile);
599         SAbundVector sabund = mergedList->getSAbundVector();
600         
601         if (countfile == "") {
602             mergedList->getRAbundVector().print(rabundFile);
603             sabund.print(sabundFile);
604         }
605
606                 sabund.print(cout);
607         }
608         catch(exception& e) {
609                 m->errorOut(e, "MGClusterCommand", "printData");
610                 exit(1);
611         }
612 }
613 //**********************************************************************************************************************
614 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
615 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
616 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
617         try {
618                 //create new listvector so you don't overwrite the clustering
619                 ListVector* newList = new ListVector(oldList);
620
621                 bool done = false;
622                 ifstream inOverlap;
623                 int count = 0;
624                 
625                 if (hclusterWanted) {  
626                         m->openInputFile(overlapFile, inOverlap);  
627                         if (inOverlap.eof()) {  done = true;  }
628                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
629                 
630                 while (!done) {
631                         if (m->control_pressed) { 
632                                 if (hclusterWanted) {   inOverlap.close();  }           
633                                 return newList;
634                         }
635                         
636                         //get next overlap
637                         seqDist overlapNode;
638                         if (!hclusterWanted) {  
639                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
640                                         overlapNode = overlapMatrix[count];
641                                         count++;
642                                 }else { break; }
643                         }else { 
644                                 if (!inOverlap.eof()) {
645                                         string firstName, secondName;
646                                         float overlapDistance;
647                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
648                                         
649                                         //commented out because we check this in readblast already
650                                         //map<string,int>::iterator itA = nameMap->find(firstName);
651                                         //map<string,int>::iterator itB = nameMap->find(secondName);
652                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
653                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
654                                         
655                                         //overlapNode.seq1 = itA->second;
656                                         //overlapNode.seq2 = itB->second;
657                                         overlapNode.seq1 = nameMap->get(firstName);
658                                         overlapNode.seq2 = nameMap->get(secondName);
659                                         overlapNode.dist = overlapDistance;
660                                 }else { inOverlap.close(); break; }
661                         } 
662                 
663                         if (overlapNode.dist < dist) {
664                                 //get names of seqs that overlap
665                                 string name1 = nameMap->get(overlapNode.seq1);
666                                 string name2 = nameMap->get(overlapNode.seq2);
667                         
668                                 //use binInfo to find out if they are already in the same bin
669                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
670                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
671                                 
672                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
673                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
674
675                                 //int binKeep = itBin1->second;
676                                 //int binRemove = itBin2->second;
677                                 
678                                 int binKeep = binInfo[name1];
679                                 int binRemove = binInfo[name2];
680                         
681                                 //if not merge bins and update binInfo
682                                 if(binKeep != binRemove) {
683                 
684                                         //save names in old bin
685                                         string names = newList->get(binRemove);
686                 
687                                         //merge bins into name1s bin
688                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
689                                         newList->set(binRemove, "");    
690                                         
691                                         //update binInfo
692                                         while (names.find_first_of(',') != -1) { 
693                                                 //get name from bin
694                                                 string name = names.substr(0,names.find_first_of(','));
695                                                 //save name and bin number
696                                                 binInfo[name] = binKeep;
697                                                 names = names.substr(names.find_first_of(',')+1, names.length());
698                                         }
699                                         
700                                         //get last name
701                                         binInfo[names] = binKeep;
702                                 }
703                                 
704                         }else { done = true; }
705                 }
706                 
707                 //return listvector
708                 return newList;
709                                 
710         }
711         catch(exception& e) {
712                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
713                 exit(1);
714         }
715 }
716 //**********************************************************************************************************************
717 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
718         try {
719                 //sort distFile
720                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
721                 m->mothurRemove(unsortedDist);  //delete unsorted file
722                 distFile = sortedDistFile;
723                 
724                 //sort overlap file
725                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
726                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
727                 overlapFile = sortedOverlapFile;
728         }
729         catch(exception& e) {
730                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
731                 exit(1);
732         }
733 }
734
735 //**********************************************************************************************************************
736
737 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
738     try {
739         //vector<string> names = ct.getNamesOfSeqs();
740
741         //for ( int i; i < ct.getNumGroups(); i++ ) {    rav.push_back( ct.getNumSeqs(names[i]) );    }
742         //return rav;
743         
744         for(int i = 0; i < list->getNumBins(); i++) { 
745            vector<string> binNames;
746            string bin = list->get(i);
747            m->splitAtComma(bin, binNames);
748            int total = 0;
749            for (int j = 0; j < binNames.size(); j++) { 
750                total += ct->getNumSeqs(binNames[j]);
751            }
752            rabund->push_back(total);   
753        }
754         
755         
756     }
757     catch(exception& e) {
758                 m->errorOut(e, "MGClusterCommand", "createRabund");
759                 exit(1);
760         }
761     
762 }
763
764 //**********************************************************************************************************************
765
766