]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){       
41         try {
42                 string helpString = "";
43                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47                 helpString += "The precision parameter's default value is 100. \n";
48                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
49                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
51                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
52                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
53                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54                 helpString += "The mgcluster command should be in the following format: \n";
55                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MGClusterCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
66         try {
67         string outputFileName = "";
68                 map<string, vector<string> >::iterator it;
69         
70         //is this a type this command creates
71         it = outputTypes.find(type);
72         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73         else {
74             if (type == "list") {  outputFileName =  "list"; }
75             else if (type == "rabund") {  outputFileName =  "rabund"; }
76             else if (type == "sabund") {  outputFileName =  "sabund"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){   
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["list"] = tempOutNames;
93                 outputTypes["rabund"] = tempOutNames;
94                 outputTypes["sabund"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
103         try {
104                 abort = false; calledHelp = false;   
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string, string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter;
117                         map<string,string>::iterator it;
118                 
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         //initialize outputTypes
125                         vector<string> tempOutNames;
126                         outputTypes["list"] = tempOutNames;
127                         outputTypes["rabund"] = tempOutNames;
128                         outputTypes["sabund"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("blast");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
141                                 }
142                                 
143                                 it = parameters.find("name");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
149                                 }
150                         }
151
152                         
153                         //check for required parameters
154                         blastfile = validParameter.validFile(parameters, "blast", true);
155                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
156                         else if (blastfile == "not found") { blastfile = ""; }
157                         
158                         //if the user changes the output directory command factory will send this info to us in the output parameter 
159                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
160                                 outputDir = ""; 
161                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
162                         }
163                         
164                         namefile = validParameter.validFile(parameters, "name", true);
165                         if (namefile == "not open") { abort = true; }   
166                         else if (namefile == "not found") { namefile = ""; }
167                         else { m->setNameFile(namefile); }
168                         
169                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
170                         
171                         //check for optional parameter and set defaults
172                         string temp;
173             temp = validParameter.validFile(parameters, "large", false);                        if (temp == "not found") { temp = "false"; }            
174                         large = m->isTrue(temp); 
175             
176                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
177                         precisionLength = temp.length();
178                         m->mothurConvert(temp, precision); 
179                         
180                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
181                         m->mothurConvert(temp, cutoff); 
182                         cutoff += (5 / (precision * 10.0));
183                         
184                         method = validParameter.validFile(parameters, "method", false);
185                         if (method == "not found") { method = "average"; }
186                         
187                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
188                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
189
190                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
191                         m->mothurConvert(temp, length); 
192                         
193                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
194                         m->mothurConvert(temp, penalty); 
195                         
196                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
197                         minWanted = m->isTrue(temp); 
198                         
199                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
200                         merge = m->isTrue(temp); 
201                         
202                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
203                         hclusterWanted = m->isTrue(temp); 
204                         
205                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
206                         hard = m->isTrue(temp);            
207                 }
208
209         }
210         catch(exception& e) {
211                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
212                 exit(1);
213         }
214 }
215 //**********************************************************************************************************************
216 int MGClusterCommand::execute(){
217         try {
218                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
219                 
220                 //read names file
221                 if (namefile != "") {
222                         nameMap = new NameAssignment(namefile);
223                         nameMap->readMap();
224                 }else{ nameMap= new NameAssignment(); }
225                 
226                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
227                 string tag = "";
228                 time_t start;
229                 float previousDist = 0.00000;
230                 float rndPreviousDist = 0.00000; 
231         
232                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
233                 //must remember to delete those objects here since readBlast does not
234                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
235                 read->read(nameMap);
236         
237         list = new ListVector(nameMap->getListVector());
238         RAbundVector* rabund = NULL;
239         
240         if(large) {
241             map<string, int> nameMapCounts = m->readNames(namefile);
242             createRabund(nameMapCounts);
243             rabund = &rav;
244         }else {
245             rabund = new RAbundVector(list->getRAbundVector());
246         }
247         
248                 
249                 //list = new ListVector(nameMap->getListVector());
250                 //rabund = new RAbundVector(list->getRAbundVector());
251                 
252                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
253                 
254                 start = time(NULL);
255                 oldList = *list;
256                 map<string, int> Seq2Bin;
257                 map<string, int> oldSeq2Bin;
258                 
259                 if (method == "furthest")               { tag = "fn";  }
260                 else if (method == "nearest")   { tag = "nn";  }
261                 else                                                    { tag = "an";  }
262                 
263         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
264         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
265         string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
266         
267                 m->openOutputFile(sabundFileName,       sabundFile);
268                 m->openOutputFile(rabundFileName,       rabundFile);
269                 m->openOutputFile(listFileName, listFile);
270                 
271                 if (m->control_pressed) { 
272                         delete nameMap; delete read; delete list; delete rabund; 
273                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
274                         outputTypes.clear();
275                         return 0; 
276                 }
277                 
278                 double saveCutoff = cutoff;
279                 
280                 if (!hclusterWanted) {
281                         //get distmatrix and overlap
282                         SparseMatrix* distMatrix = read->getDistMatrix();
283                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
284                         delete read;
285                 
286                         //create cluster
287                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
288                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
289                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
290                         cluster->setMapWanted(true);
291                         Seq2Bin = cluster->getSeqtoBin();
292                         oldSeq2Bin = Seq2Bin;
293                         
294                         if (m->control_pressed) { 
295                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
296                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
297                                 outputTypes.clear();
298                                 return 0; 
299                         }
300                 
301                         //cluster using cluster classes
302                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
303                                 
304                                 cluster->update(cutoff);
305                                 
306                                 if (m->control_pressed) { 
307                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
308                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
309                                         outputTypes.clear();
310                                         return 0; 
311                                 }
312                                 
313                                 float dist = distMatrix->getSmallDist();
314                                 float rndDist;
315                                 if (hard) {
316                                         rndDist = m->ceilDist(dist, precision); 
317                                 }else{
318                                         rndDist = m->roundDist(dist, precision); 
319                                 }
320                                 
321                                 if(previousDist <= 0.0000 && dist != previousDist){
322                                         oldList.setLabel("unique");
323                                         printData(&oldList);
324                                 }
325                                 else if(rndDist != rndPreviousDist){
326                                         if (merge) {
327                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
328                                                 
329                                                 if (m->control_pressed) { 
330                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
331                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
332                                                         outputTypes.clear();
333                                                         return 0; 
334                                                 }
335                                                 
336                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
337                                                 printData(temp);
338                                                 delete temp;
339                                         }else{
340                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
341                                                 printData(&oldList);
342                                         }
343                                 }
344         
345                                 previousDist = dist;
346                                 rndPreviousDist = rndDist;
347                                 oldList = *list;
348                                 Seq2Bin = cluster->getSeqtoBin();
349                                 oldSeq2Bin = Seq2Bin;
350                         }
351                         
352                         if(previousDist <= 0.0000){
353                                 oldList.setLabel("unique");
354                                 printData(&oldList);
355                         }
356                         else if(rndPreviousDist<cutoff){
357                                 if (merge) {
358                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
359                                         
360                                         if (m->control_pressed) { 
361                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
362                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
363                                                         outputTypes.clear();
364                                                         return 0; 
365                                         }
366                                         
367                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
368                                         printData(temp);
369                                         delete temp;
370                                 }else{
371                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
372                                         printData(&oldList);
373                                 }
374                         }
375                         
376                         //free memory
377                         overlapMatrix.clear();
378                         delete distMatrix;
379                         delete cluster;
380                         
381                 }else { //use hcluster to cluster
382                         //get distmatrix and overlap
383                         overlapFile = read->getOverlapFile();
384                         distFile = read->getDistFile(); 
385                         delete read;
386                 
387                         //sort the distance and overlap files
388                         sortHclusterFiles(distFile, overlapFile);
389                         
390                         if (m->control_pressed) { 
391                                 delete nameMap;  delete list; delete rabund; 
392                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
393                                 outputTypes.clear();
394                                 return 0; 
395                         }
396                 
397                         //create cluster
398                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
399                         hcluster->setMapWanted(true);
400                         Seq2Bin = cluster->getSeqtoBin();
401                         oldSeq2Bin = Seq2Bin;
402                         
403                         vector<seqDist> seqs; seqs.resize(1); // to start loop
404                         //ifstream inHcluster;
405                         //m->openInputFile(distFile, inHcluster);
406                         
407                         if (m->control_pressed) { 
408                                 delete nameMap;  delete list; delete rabund; delete hcluster;
409                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
410                                 outputTypes.clear();
411                                 return 0; 
412                         }
413
414                         while (seqs.size() != 0){
415                 
416                                 seqs = hcluster->getSeqs();
417                                 
418                                 //to account for cutoff change in average neighbor
419                                 if (seqs.size() != 0) {
420                                         if (seqs[0].dist > cutoff) { break; }
421                                 }
422                                 
423                                 if (m->control_pressed) { 
424                                         delete nameMap;  delete list; delete rabund; delete hcluster;
425                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
426                                         m->mothurRemove(distFile);
427                                         m->mothurRemove(overlapFile);
428                                         outputTypes.clear();
429                                         return 0; 
430                                 }
431                                 
432                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
433                                         
434                                         if (seqs[i].seq1 != seqs[i].seq2) {
435                 
436                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
437                                                 
438                                                 if (m->control_pressed) { 
439                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
440                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
441                                                         m->mothurRemove(distFile);
442                                                         m->mothurRemove(overlapFile);
443                                                         outputTypes.clear();
444                                                         return 0; 
445                                                 }
446         
447                                                 float rndDist;
448                                                 if (hard) {
449                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
450                                                 }else{
451                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
452                                                 }
453                                                                                                 
454                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
455                                                         oldList.setLabel("unique");
456                                                         printData(&oldList);
457                                                 }
458                                                 else if((rndDist != rndPreviousDist)){
459                                                         if (merge) {
460                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
461                                                                 
462                                                                 if (m->control_pressed) { 
463                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
464                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
465                                                                         m->mothurRemove(distFile);
466                                                                         m->mothurRemove(overlapFile);
467                                                                         outputTypes.clear();
468                                                                         return 0; 
469                                                                 }
470
471                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
472                                                                 printData(temp);
473                                                                 delete temp;
474                                                         }else{
475                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
476                                                                 printData(&oldList);
477                                                         }
478                                                 }
479                                                 
480                                                 previousDist = seqs[i].dist;
481                                                 rndPreviousDist = rndDist;
482                                                 oldList = *list;
483                                                 Seq2Bin = cluster->getSeqtoBin();
484                                                 oldSeq2Bin = Seq2Bin;
485                                         }
486                                 }
487                         }
488                         //inHcluster.close();
489                         
490                         if(previousDist <= 0.0000){
491                                 oldList.setLabel("unique");
492                                 printData(&oldList);
493                         }
494                         else if(rndPreviousDist<cutoff){
495                                 if (merge) {
496                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
497                                         
498                                         if (m->control_pressed) { 
499                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
500                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
501                                                         m->mothurRemove(distFile);
502                                                         m->mothurRemove(overlapFile);
503                                                         outputTypes.clear();
504                                                         return 0; 
505                                         }
506                                         
507                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
508                                         printData(temp);
509                                         delete temp;
510                                 }else{
511                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
512                                         printData(&oldList);
513                                 }
514                         }
515                         
516                         delete hcluster;
517                         m->mothurRemove(distFile);
518                         m->mothurRemove(overlapFile);
519                 }
520                 
521                 delete list;
522                 if (!large) {delete rabund;}
523                 listFile.close();
524                 sabundFile.close();
525                 rabundFile.close();
526         
527                 if (m->control_pressed) { 
528                         delete nameMap; 
529                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
530                         outputTypes.clear();
531                         return 0; 
532                 }
533                 
534                 m->mothurOutEndLine();
535                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
536                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
537                 m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
538                 m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
539                 m->mothurOutEndLine();
540                 
541                 if (saveCutoff != cutoff) { 
542                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
543                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
544                         
545                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
546                 }
547                 
548                 //set list file as new current listfile
549                 string current = "";
550                 itTypes = outputTypes.find("list");
551                 if (itTypes != outputTypes.end()) {
552                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
553                 }
554                 
555                 //set rabund file as new current rabundfile
556                 itTypes = outputTypes.find("rabund");
557                 if (itTypes != outputTypes.end()) {
558                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
559                 }
560                 
561                 //set sabund file as new current sabundfile
562                 itTypes = outputTypes.find("sabund");
563                 if (itTypes != outputTypes.end()) {
564                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
565                 }
566                 
567                 
568                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
569                         
570                 return 0;
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "MGClusterCommand", "execute");
574                 exit(1);
575         }
576 }
577 //**********************************************************************************************************************
578 void MGClusterCommand::printData(ListVector* mergedList){
579         try {
580                 mergedList->print(listFile);
581                 mergedList->getRAbundVector().print(rabundFile);
582                 
583                 SAbundVector sabund = mergedList->getSAbundVector();
584
585                 sabund.print(cout);
586                 sabund.print(sabundFile);
587         }
588         catch(exception& e) {
589                 m->errorOut(e, "MGClusterCommand", "printData");
590                 exit(1);
591         }
592 }
593 //**********************************************************************************************************************
594 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
595 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
596 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
597         try {
598                 //create new listvector so you don't overwrite the clustering
599                 ListVector* newList = new ListVector(oldList);
600
601                 bool done = false;
602                 ifstream inOverlap;
603                 int count = 0;
604                 
605                 if (hclusterWanted) {  
606                         m->openInputFile(overlapFile, inOverlap);  
607                         if (inOverlap.eof()) {  done = true;  }
608                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
609                 
610                 while (!done) {
611                         if (m->control_pressed) { 
612                                 if (hclusterWanted) {   inOverlap.close();  }           
613                                 return newList;
614                         }
615                         
616                         //get next overlap
617                         seqDist overlapNode;
618                         if (!hclusterWanted) {  
619                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
620                                         overlapNode = overlapMatrix[count];
621                                         count++;
622                                 }else { break; }
623                         }else { 
624                                 if (!inOverlap.eof()) {
625                                         string firstName, secondName;
626                                         float overlapDistance;
627                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
628                                         
629                                         //commented out because we check this in readblast already
630                                         //map<string,int>::iterator itA = nameMap->find(firstName);
631                                         //map<string,int>::iterator itB = nameMap->find(secondName);
632                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
633                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
634                                         
635                                         //overlapNode.seq1 = itA->second;
636                                         //overlapNode.seq2 = itB->second;
637                                         overlapNode.seq1 = nameMap->get(firstName);
638                                         overlapNode.seq2 = nameMap->get(secondName);
639                                         overlapNode.dist = overlapDistance;
640                                 }else { inOverlap.close(); break; }
641                         } 
642                 
643                         if (overlapNode.dist < dist) {
644                                 //get names of seqs that overlap
645                                 string name1 = nameMap->get(overlapNode.seq1);
646                                 string name2 = nameMap->get(overlapNode.seq2);
647                         
648                                 //use binInfo to find out if they are already in the same bin
649                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
650                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
651                                 
652                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
653                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
654
655                                 //int binKeep = itBin1->second;
656                                 //int binRemove = itBin2->second;
657                                 
658                                 int binKeep = binInfo[name1];
659                                 int binRemove = binInfo[name2];
660                         
661                                 //if not merge bins and update binInfo
662                                 if(binKeep != binRemove) {
663                 
664                                         //save names in old bin
665                                         string names = newList->get(binRemove);
666                 
667                                         //merge bins into name1s bin
668                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
669                                         newList->set(binRemove, "");    
670                                         
671                                         //update binInfo
672                                         while (names.find_first_of(',') != -1) { 
673                                                 //get name from bin
674                                                 string name = names.substr(0,names.find_first_of(','));
675                                                 //save name and bin number
676                                                 binInfo[name] = binKeep;
677                                                 names = names.substr(names.find_first_of(',')+1, names.length());
678                                         }
679                                         
680                                         //get last name
681                                         binInfo[names] = binKeep;
682                                 }
683                                 
684                         }else { done = true; }
685                 }
686                 
687                 //return listvector
688                 return newList;
689                                 
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
693                 exit(1);
694         }
695 }
696 //**********************************************************************************************************************
697 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
698         try {
699                 //sort distFile
700                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
701                 m->mothurRemove(unsortedDist);  //delete unsorted file
702                 distFile = sortedDistFile;
703                 
704                 //sort overlap file
705                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
706                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
707                 overlapFile = sortedOverlapFile;
708         }
709         catch(exception& e) {
710                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
711                 exit(1);
712         }
713 }
714
715 //**********************************************************************************************************************
716
717 void MGClusterCommand::createRabund(map<string, int> nameMapCounts){
718     try {
719         //RAbundVector rav;
720         map<string,int>::iterator it;
721         //it = nameMapCounts.begin();
722         //for(int i = 0; i < list->getNumBins(); i++) {   rav.push_back((*it).second); it++;    }
723         for ( it=nameMapCounts.begin(); it!=nameMapCounts.end(); it++ ) {    rav.push_back( it->second );    }
724         //return rav;
725     }
726     catch(exception& e) {
727                 m->errorOut(e, "MGClusterCommand", "createRabund");
728                 exit(1);
729         }
730     
731 }
732
733 //**********************************************************************************************************************
734
735