]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
17                 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){       
41         try {
42                 string helpString = "";
43                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47                 helpString += "The precision parameter's default value is 100. \n";
48                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
49                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
51                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
52                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
53                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54                 helpString += "The mgcluster command should be in the following format: \n";
55                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MGClusterCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
66         try {
67         string outputFileName = "";
68                 map<string, vector<string> >::iterator it;
69         
70         //is this a type this command creates
71         it = outputTypes.find(type);
72         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73         else {
74             if (type == "list") {  outputFileName =  "list"; }
75             else if (type == "rabund") {  outputFileName =  "rabund"; }
76             else if (type == "sabund") {  outputFileName =  "sabund"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){   
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["list"] = tempOutNames;
93                 outputTypes["rabund"] = tempOutNames;
94                 outputTypes["sabund"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
103         try {
104                 abort = false; calledHelp = false;   
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string, string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter;
117                         map<string,string>::iterator it;
118                 
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         //initialize outputTypes
125                         vector<string> tempOutNames;
126                         outputTypes["list"] = tempOutNames;
127                         outputTypes["rabund"] = tempOutNames;
128                         outputTypes["sabund"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("blast");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
141                                 }
142                                 
143                                 it = parameters.find("name");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
149                                 }
150                 
151                 it = parameters.find("count");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
157                                 }
158                         }
159
160                         
161                         //check for required parameters
162                         blastfile = validParameter.validFile(parameters, "blast", true);
163                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
164                         else if (blastfile == "not found") { blastfile = ""; }
165                         
166                         //if the user changes the output directory command factory will send this info to us in the output parameter 
167                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
168                                 outputDir = ""; 
169                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
170                         }
171                         
172                         namefile = validParameter.validFile(parameters, "name", true);
173                         if (namefile == "not open") { abort = true; }   
174                         else if (namefile == "not found") { namefile = ""; }
175                         else { m->setNameFile(namefile); }
176             
177             countfile = validParameter.validFile(parameters, "count", true);
178                         if (countfile == "not open") { abort = true; }  
179                         else if (countfile == "not found") { countfile = ""; }
180             else { m->setCountTableFile(countfile); }
181             
182             if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
183                         
184                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
185                         
186                         //check for optional parameter and set defaults
187                         string temp;
188             temp = validParameter.validFile(parameters, "precision", false);            if (temp == "not found") { temp = "100"; }
189                         precisionLength = temp.length();
190                         m->mothurConvert(temp, precision); 
191                         
192                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
193                         m->mothurConvert(temp, cutoff); 
194                         cutoff += (5 / (precision * 10.0));
195                         
196                         method = validParameter.validFile(parameters, "method", false);
197                         if (method == "not found") { method = "average"; }
198                         
199                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
200                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
201
202                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
203                         m->mothurConvert(temp, length); 
204                         
205                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
206                         m->mothurConvert(temp, penalty); 
207                         
208                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
209                         minWanted = m->isTrue(temp); 
210                         
211                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
212                         merge = m->isTrue(temp); 
213                         
214                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
215                         hclusterWanted = m->isTrue(temp); 
216                         
217                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
218                         hard = m->isTrue(temp);            
219                 }
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
224                 exit(1);
225         }
226 }
227 //**********************************************************************************************************************
228 int MGClusterCommand::execute(){
229         try {
230                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
231                 
232                 //read names file
233                 if (namefile != "") {
234                         nameMap = new NameAssignment(namefile);
235                         nameMap->readMap();
236                 }else{ nameMap= new NameAssignment(); }
237                 
238                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
239                 string tag = "";
240                 time_t start;
241                 float previousDist = 0.00000;
242                 float rndPreviousDist = 0.00000; 
243         
244                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
245                 //must remember to delete those objects here since readBlast does not
246                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
247                 read->read(nameMap);
248         
249         list = new ListVector(nameMap->getListVector());
250         RAbundVector* rabund = NULL;
251         
252         if(countfile != "") {
253             //map<string, int> nameMapCounts = m->readNames(namefile);
254             ct = new CountTable();
255             ct->readTable(countfile);
256             rabund = new RAbundVector();
257             createRabund(ct, list, rabund);
258         }else {
259             rabund = new RAbundVector(list->getRAbundVector());
260         }
261         
262                 
263                 //list = new ListVector(nameMap->getListVector());
264                 //rabund = new RAbundVector(list->getRAbundVector());
265                 
266                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
267                 
268                 start = time(NULL);
269                 oldList = *list;
270                 map<string, int> Seq2Bin;
271                 map<string, int> oldSeq2Bin;
272                 
273                 if (method == "furthest")               { tag = "fn";  }
274                 else if (method == "nearest")   { tag = "nn";  }
275                 else                                                    { tag = "an";  }
276                 
277         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
278         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
279         string listFileName = fileroot+ tag + ".";
280         if (countfile != "") { listFileName += "unique_"; }
281         listFileName += getOutputFileNameTag("list");
282         
283         if (countfile == "") {
284             m->openOutputFile(sabundFileName,   sabundFile);
285             m->openOutputFile(rabundFileName,   rabundFile);
286         }
287                 m->openOutputFile(listFileName, listFile);
288                 
289                 if (m->control_pressed) { 
290                         delete nameMap; delete read; delete list; delete rabund; 
291                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
292                         outputTypes.clear();
293                         return 0; 
294                 }
295                 
296                 double saveCutoff = cutoff;
297                 
298                 if (!hclusterWanted) {
299                         //get distmatrix and overlap
300                         SparseDistanceMatrix* distMatrix = read->getDistMatrix();
301                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
302                         delete read;
303                 
304                         //create cluster
305                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
306                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
307                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
308                         cluster->setMapWanted(true);
309                         Seq2Bin = cluster->getSeqtoBin();
310                         oldSeq2Bin = Seq2Bin;
311                         
312                         if (m->control_pressed) { 
313                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
314                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
315                                 outputTypes.clear();
316                                 return 0; 
317                         }
318                 
319                         //cluster using cluster classes
320                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
321                                 
322                                 cluster->update(cutoff);
323                                 
324                                 if (m->control_pressed) { 
325                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
326                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
327                                         outputTypes.clear();
328                                         return 0; 
329                                 }
330                                 
331                                 float dist = distMatrix->getSmallDist();
332                                 float rndDist;
333                                 if (hard) {
334                                         rndDist = m->ceilDist(dist, precision); 
335                                 }else{
336                                         rndDist = m->roundDist(dist, precision); 
337                                 }
338                                 
339                                 if(previousDist <= 0.0000 && dist != previousDist){
340                                         oldList.setLabel("unique");
341                                         printData(&oldList);
342                                 }
343                                 else if(rndDist != rndPreviousDist){
344                                         if (merge) {
345                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
346                                                 
347                                                 if (m->control_pressed) { 
348                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
349                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
350                                                         outputTypes.clear();
351                                                         return 0; 
352                                                 }
353                                                 
354                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
355                                                 printData(temp);
356                                                 delete temp;
357                                         }else{
358                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
359                                                 printData(&oldList);
360                                         }
361                                 }
362         
363                                 previousDist = dist;
364                                 rndPreviousDist = rndDist;
365                                 oldList = *list;
366                                 Seq2Bin = cluster->getSeqtoBin();
367                                 oldSeq2Bin = Seq2Bin;
368                         }
369                         
370                         if(previousDist <= 0.0000){
371                                 oldList.setLabel("unique");
372                                 printData(&oldList);
373                         }
374                         else if(rndPreviousDist<cutoff){
375                                 if (merge) {
376                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
377                                         
378                                         if (m->control_pressed) { 
379                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
380                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
381                                                         outputTypes.clear();
382                                                         return 0; 
383                                         }
384                                         
385                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
386                                         printData(temp);
387                                         delete temp;
388                                 }else{
389                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
390                                         printData(&oldList);
391                                 }
392                         }
393                         
394                         //free memory
395                         overlapMatrix.clear();
396                         delete distMatrix;
397                         delete cluster;
398                         
399                 }else { //use hcluster to cluster
400                         //get distmatrix and overlap
401                         overlapFile = read->getOverlapFile();
402                         distFile = read->getDistFile(); 
403                         delete read;
404                 
405                         //sort the distance and overlap files
406                         sortHclusterFiles(distFile, overlapFile);
407                         
408                         if (m->control_pressed) { 
409                                 delete nameMap;  delete list; delete rabund; 
410                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
411                                 outputTypes.clear();
412                                 return 0; 
413                         }
414                 
415                         //create cluster
416                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
417                         hcluster->setMapWanted(true);
418                         Seq2Bin = cluster->getSeqtoBin();
419                         oldSeq2Bin = Seq2Bin;
420                         
421                         vector<seqDist> seqs; seqs.resize(1); // to start loop
422                         //ifstream inHcluster;
423                         //m->openInputFile(distFile, inHcluster);
424                         
425                         if (m->control_pressed) { 
426                                 delete nameMap;  delete list; delete rabund; delete hcluster;
427                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
428                                 outputTypes.clear();
429                                 return 0; 
430                         }
431
432                         while (seqs.size() != 0){
433                 
434                                 seqs = hcluster->getSeqs();
435                                 
436                                 //to account for cutoff change in average neighbor
437                                 if (seqs.size() != 0) {
438                                         if (seqs[0].dist > cutoff) { break; }
439                                 }
440                                 
441                                 if (m->control_pressed) { 
442                                         delete nameMap;  delete list; delete rabund; delete hcluster;
443                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
444                                         m->mothurRemove(distFile);
445                                         m->mothurRemove(overlapFile);
446                                         outputTypes.clear();
447                                         return 0; 
448                                 }
449                                 
450                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
451                                         
452                                         if (seqs[i].seq1 != seqs[i].seq2) {
453                 
454                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
455                                                 
456                                                 if (m->control_pressed) { 
457                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
458                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
459                                                         m->mothurRemove(distFile);
460                                                         m->mothurRemove(overlapFile);
461                                                         outputTypes.clear();
462                                                         return 0; 
463                                                 }
464         
465                                                 float rndDist;
466                                                 if (hard) {
467                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
468                                                 }else{
469                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
470                                                 }
471                                                                                                 
472                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
473                                                         oldList.setLabel("unique");
474                                                         printData(&oldList);
475                                                 }
476                                                 else if((rndDist != rndPreviousDist)){
477                                                         if (merge) {
478                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
479                                                                 
480                                                                 if (m->control_pressed) { 
481                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
482                                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
483                                                                         m->mothurRemove(distFile);
484                                                                         m->mothurRemove(overlapFile);
485                                                                         outputTypes.clear();
486                                                                         return 0; 
487                                                                 }
488
489                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
490                                                                 printData(temp);
491                                                                 delete temp;
492                                                         }else{
493                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
494                                                                 printData(&oldList);
495                                                         }
496                                                 }
497                                                 
498                                                 previousDist = seqs[i].dist;
499                                                 rndPreviousDist = rndDist;
500                                                 oldList = *list;
501                                                 Seq2Bin = cluster->getSeqtoBin();
502                                                 oldSeq2Bin = Seq2Bin;
503                                         }
504                                 }
505                         }
506                         //inHcluster.close();
507                         
508                         if(previousDist <= 0.0000){
509                                 oldList.setLabel("unique");
510                                 printData(&oldList);
511                         }
512                         else if(rndPreviousDist<cutoff){
513                                 if (merge) {
514                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
515                                         
516                                         if (m->control_pressed) { 
517                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
518                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
519                                                         m->mothurRemove(distFile);
520                                                         m->mothurRemove(overlapFile);
521                                                         outputTypes.clear();
522                                                         return 0; 
523                                         }
524                                         
525                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
526                                         printData(temp);
527                                         delete temp;
528                                 }else{
529                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
530                                         printData(&oldList);
531                                 }
532                         }
533                         
534                         delete hcluster;
535                         m->mothurRemove(distFile);
536                         m->mothurRemove(overlapFile);
537                 }
538                 
539                 delete list;
540                 delete rabund;
541                 listFile.close();
542         if (countfile == "") {
543             sabundFile.close();
544             rabundFile.close();
545         }
546                 if (m->control_pressed) { 
547                         delete nameMap; 
548                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
549                         outputTypes.clear();
550                         return 0; 
551                 }
552                 
553                 m->mothurOutEndLine();
554                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
555                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
556                 if (countfile == "") {
557             m->mothurOut(rabundFileName); m->mothurOutEndLine();        outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
558             m->mothurOut(sabundFileName); m->mothurOutEndLine();        outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
559         }
560                 m->mothurOutEndLine();
561                 
562                 if (saveCutoff != cutoff) { 
563                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
564                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
565                         
566                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
567                 }
568                 
569                 //set list file as new current listfile
570                 string current = "";
571                 itTypes = outputTypes.find("list");
572                 if (itTypes != outputTypes.end()) {
573                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
574                 }
575                 
576                 //set rabund file as new current rabundfile
577                 itTypes = outputTypes.find("rabund");
578                 if (itTypes != outputTypes.end()) {
579                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
580                 }
581                 
582                 //set sabund file as new current sabundfile
583                 itTypes = outputTypes.find("sabund");
584                 if (itTypes != outputTypes.end()) {
585                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
586                 }
587                 
588                 
589                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
590                         
591                 return 0;
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "MGClusterCommand", "execute");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599 void MGClusterCommand::printData(ListVector* mergedList){
600         try {
601                 mergedList->print(listFile);
602         SAbundVector sabund = mergedList->getSAbundVector();
603         
604         if (countfile == "") {
605             mergedList->getRAbundVector().print(rabundFile);
606             sabund.print(sabundFile);
607         }
608
609                 sabund.print(cout);
610         }
611         catch(exception& e) {
612                 m->errorOut(e, "MGClusterCommand", "printData");
613                 exit(1);
614         }
615 }
616 //**********************************************************************************************************************
617 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
618 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
619 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
620         try {
621                 //create new listvector so you don't overwrite the clustering
622                 ListVector* newList = new ListVector(oldList);
623
624                 bool done = false;
625                 ifstream inOverlap;
626                 int count = 0;
627                 
628                 if (hclusterWanted) {  
629                         m->openInputFile(overlapFile, inOverlap);  
630                         if (inOverlap.eof()) {  done = true;  }
631                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
632                 
633                 while (!done) {
634                         if (m->control_pressed) { 
635                                 if (hclusterWanted) {   inOverlap.close();  }           
636                                 return newList;
637                         }
638                         
639                         //get next overlap
640                         seqDist overlapNode;
641                         if (!hclusterWanted) {  
642                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
643                                         overlapNode = overlapMatrix[count];
644                                         count++;
645                                 }else { break; }
646                         }else { 
647                                 if (!inOverlap.eof()) {
648                                         string firstName, secondName;
649                                         float overlapDistance;
650                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
651                                         
652                                         //commented out because we check this in readblast already
653                                         //map<string,int>::iterator itA = nameMap->find(firstName);
654                                         //map<string,int>::iterator itB = nameMap->find(secondName);
655                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
656                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
657                                         
658                                         //overlapNode.seq1 = itA->second;
659                                         //overlapNode.seq2 = itB->second;
660                                         overlapNode.seq1 = nameMap->get(firstName);
661                                         overlapNode.seq2 = nameMap->get(secondName);
662                                         overlapNode.dist = overlapDistance;
663                                 }else { inOverlap.close(); break; }
664                         } 
665                 
666                         if (overlapNode.dist < dist) {
667                                 //get names of seqs that overlap
668                                 string name1 = nameMap->get(overlapNode.seq1);
669                                 string name2 = nameMap->get(overlapNode.seq2);
670                         
671                                 //use binInfo to find out if they are already in the same bin
672                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
673                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
674                                 
675                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
676                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
677
678                                 //int binKeep = itBin1->second;
679                                 //int binRemove = itBin2->second;
680                                 
681                                 int binKeep = binInfo[name1];
682                                 int binRemove = binInfo[name2];
683                         
684                                 //if not merge bins and update binInfo
685                                 if(binKeep != binRemove) {
686                 
687                                         //save names in old bin
688                                         string names = newList->get(binRemove);
689                 
690                                         //merge bins into name1s bin
691                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
692                                         newList->set(binRemove, "");    
693                                         
694                                         //update binInfo
695                                         while (names.find_first_of(',') != -1) { 
696                                                 //get name from bin
697                                                 string name = names.substr(0,names.find_first_of(','));
698                                                 //save name and bin number
699                                                 binInfo[name] = binKeep;
700                                                 names = names.substr(names.find_first_of(',')+1, names.length());
701                                         }
702                                         
703                                         //get last name
704                                         binInfo[names] = binKeep;
705                                 }
706                                 
707                         }else { done = true; }
708                 }
709                 
710                 //return listvector
711                 return newList;
712                                 
713         }
714         catch(exception& e) {
715                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
716                 exit(1);
717         }
718 }
719 //**********************************************************************************************************************
720 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
721         try {
722                 //sort distFile
723                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
724                 m->mothurRemove(unsortedDist);  //delete unsorted file
725                 distFile = sortedDistFile;
726                 
727                 //sort overlap file
728                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
729                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
730                 overlapFile = sortedOverlapFile;
731         }
732         catch(exception& e) {
733                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
734                 exit(1);
735         }
736 }
737
738 //**********************************************************************************************************************
739
740 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
741     try {
742         //vector<string> names = ct.getNamesOfSeqs();
743
744         //for ( int i; i < ct.getNumGroups(); i++ ) {    rav.push_back( ct.getNumSeqs(names[i]) );    }
745         //return rav;
746         
747         for(int i = 0; i < list->getNumBins(); i++) { 
748            vector<string> binNames;
749            string bin = list->get(i);
750            m->splitAtComma(bin, binNames);
751            int total = 0;
752            for (int j = 0; j < binNames.size(); j++) { 
753                total += ct->getNumSeqs(binNames[j]);
754            }
755            rabund->push_back(total);   
756        }
757         
758         
759     }
760     catch(exception& e) {
761                 m->errorOut(e, "MGClusterCommand", "createRabund");
762                 exit(1);
763         }
764     
765 }
766
767 //**********************************************************************************************************************
768
769