]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
adding labels to list file.
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
17                 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
26         CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
27                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "MGClusterCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string MGClusterCommand::getHelpString(){       
42         try {
43                 string helpString = "";
44                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty, adjust and hcluster. The blast parameter is required.\n";
45                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
46                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
47                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
48                 helpString += "The precision parameter's default value is 100. \n";
49                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
50                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
51                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
52         helpString += "The adjust parameter is used to handle missing distances.  If you set a cutoff, adjust=f by default.  If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method.  Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
53                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
54                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
55                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
56                 helpString += "The mgcluster command should be in the following format: \n";
57                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "MGClusterCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string MGClusterCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } 
72         else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; } 
73         else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
74         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
75         
76         return pattern;
77     }
78     catch(exception& e) {
79         m->errorOut(e, "MGClusterCommand", "getOutputPattern");
80         exit(1);
81     }
82 }
83 //*******************************************************************************************************************
84 MGClusterCommand::MGClusterCommand(){   
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["list"] = tempOutNames;
90                 outputTypes["rabund"] = tempOutNames;
91                 outputTypes["sabund"] = tempOutNames;
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
95                 exit(1);
96         }
97 }
98 //**********************************************************************************************************************
99 MGClusterCommand::MGClusterCommand(string option) {
100         try {
101                 abort = false; calledHelp = false;   
102                 
103                 //allow user to run help
104                 if(option == "help") { help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         
113                         ValidParameters validParameter;
114                         map<string,string>::iterator it;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["list"] = tempOutNames;
124                         outputTypes["rabund"] = tempOutNames;
125                         outputTypes["sabund"] = tempOutNames;
126                         
127                         //if the user changes the input directory command factory will send this info to us in the output parameter 
128                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
129                         if (inputDir == "not found"){   inputDir = "";          }
130                         else {
131                                 string path;
132                                 it = parameters.find("blast");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
138                                 }
139                                 
140                                 it = parameters.find("name");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
146                                 }
147                 
148                 it = parameters.find("count");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
154                                 }
155                         }
156
157                         
158                         //check for required parameters
159                         blastfile = validParameter.validFile(parameters, "blast", true);
160                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
161                         else if (blastfile == "not found") { blastfile = ""; }
162                         
163                         //if the user changes the output directory command factory will send this info to us in the output parameter 
164                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
165                                 outputDir = ""; 
166                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
167                         }
168                         
169                         namefile = validParameter.validFile(parameters, "name", true);
170                         if (namefile == "not open") { abort = true; }   
171                         else if (namefile == "not found") { namefile = ""; }
172                         else { m->setNameFile(namefile); }
173             
174             countfile = validParameter.validFile(parameters, "count", true);
175                         if (countfile == "not open") { abort = true; }  
176                         else if (countfile == "not found") { countfile = ""; }
177             else { m->setCountTableFile(countfile); }
178             
179             if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
180                         
181                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
182                         
183                         //check for optional parameter and set defaults
184                         string temp;
185             temp = validParameter.validFile(parameters, "precision", false);            if (temp == "not found") { temp = "100"; }
186                         precisionLength = temp.length();
187                         m->mothurConvert(temp, precision); 
188                         
189             cutoffSet = false;
190                         temp = validParameter.validFile(parameters, "cutoff", false);
191             if (temp == "not found") { temp = "0.70"; }
192             else { cutoffSet = true;  }
193                         m->mothurConvert(temp, cutoff); 
194                         cutoff += (5 / (precision * 10.0));
195                         
196                         method = validParameter.validFile(parameters, "method", false);
197                         if (method == "not found") { method = "average"; }
198                         
199                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
200                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
201
202                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
203                         m->mothurConvert(temp, length); 
204                         
205                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
206                         m->mothurConvert(temp, penalty); 
207                         
208                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
209                         minWanted = m->isTrue(temp); 
210                         
211                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
212                         merge = m->isTrue(temp); 
213                         
214                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
215                         hclusterWanted = m->isTrue(temp); 
216                         
217                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
218                         hard = m->isTrue(temp);
219             
220             temp = validParameter.validFile(parameters, "adjust", false);                               if (temp == "not found") { if (cutoffSet) { temp = "F"; }else { temp="T"; } }
221             if (m->isNumeric1(temp))    { m->mothurConvert(temp, adjust);   }
222             else if (m->isTrue(temp))   { adjust = 1.0;                     }
223             else                        { adjust = -1.0;                    }
224                 }
225
226         }
227         catch(exception& e) {
228                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
229                 exit(1);
230         }
231 }
232 //**********************************************************************************************************************
233 int MGClusterCommand::execute(){
234         try {
235                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
236                 
237                 //read names file
238                 if (namefile != "") {
239                         nameMap = new NameAssignment(namefile);
240                         nameMap->readMap();
241                 }else if (countfile != "") {
242             ct = new CountTable();
243             ct->readTable(countfile, false, false);
244             nameMap= new NameAssignment();
245             vector<string> tempNames = ct->getNamesOfSeqs();
246             for (int i = 0; i < tempNames.size(); i++) {  nameMap->push_back(tempNames[i]);  }
247         }else{ nameMap= new NameAssignment(); }
248                 
249                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
250                 string tag = "";
251                 time_t start;
252                 float previousDist = 0.00000;
253                 float rndPreviousDist = 0.00000; 
254         
255                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
256                 //must remember to delete those objects here since readBlast does not
257                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
258                 read->read(nameMap);
259         
260         list = new ListVector(nameMap->getListVector());
261         RAbundVector* rabund = NULL;
262         
263         if(countfile != "") {
264             rabund = new RAbundVector();
265             createRabund(ct, list, rabund);
266         }else {
267             rabund = new RAbundVector(list->getRAbundVector());
268         }
269         
270                 
271                 //list = new ListVector(nameMap->getListVector());
272                 //rabund = new RAbundVector(list->getRAbundVector());
273                 
274                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
275                 
276                 start = time(NULL);
277                 oldList = *list;
278                 map<string, int> Seq2Bin;
279                 map<string, int> oldSeq2Bin;
280                 
281                 if (method == "furthest")               { tag = "fn";  }
282                 else if (method == "nearest")   { tag = "nn";  }
283                 else                                                    { tag = "an";  }
284                 
285         map<string, string> variables; 
286         variables["[filename]"] = fileroot;
287         variables["[clustertag]"] = tag;
288         string sabundFileName = getOutputFileName("sabund", variables);
289         string rabundFileName = getOutputFileName("rabund", variables);
290         if (countfile != "") { variables["[tag2]"] = "unique_list"; }
291         string listFileName = getOutputFileName("list", variables);
292         
293         if (countfile == "") {
294             m->openOutputFile(sabundFileName,   sabundFile);
295             m->openOutputFile(rabundFileName,   rabundFile);
296         }
297                 m->openOutputFile(listFileName, listFile);
298         list->printHeaders(listFile);
299                 
300                 if (m->control_pressed) { 
301                         delete nameMap; delete read; delete list; delete rabund; 
302                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
303                         outputTypes.clear();
304                         return 0; 
305                 }
306                 
307                 double saveCutoff = cutoff;
308                 
309                 if (!hclusterWanted) {
310                         //get distmatrix and overlap
311                         SparseDistanceMatrix* distMatrix = read->getDistMatrix();
312                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
313                         delete read;
314                 
315                         //create cluster
316                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
317                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
318                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
319                         cluster->setMapWanted(true);
320                         Seq2Bin = cluster->getSeqtoBin();
321                         oldSeq2Bin = Seq2Bin;
322                         
323                         if (m->control_pressed) { 
324                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
325                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
326                                 outputTypes.clear();
327                                 return 0; 
328                         }
329             
330             
331                         //cluster using cluster classes
332                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
333                                 
334                 if (m->debug) {  cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
335                 
336                                 cluster->update(cutoff);
337                                 
338                                 if (m->control_pressed) { 
339                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
340                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
341                                         outputTypes.clear();
342                                         return 0; 
343                                 }
344                                 
345                                 float dist = distMatrix->getSmallDist();
346                                 float rndDist;
347                                 if (hard) {
348                                         rndDist = m->ceilDist(dist, precision); 
349                                 }else{
350                                         rndDist = m->roundDist(dist, precision); 
351                                 }
352                                 
353                                 if(previousDist <= 0.0000 && dist != previousDist){
354                                         oldList.setLabel("unique");
355                                         printData(&oldList);
356                                 }
357                                 else if(rndDist != rndPreviousDist){
358                                         if (merge) {
359                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
360                                                 
361                                                 if (m->control_pressed) { 
362                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
363                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
364                                                         outputTypes.clear();
365                                                         return 0; 
366                                                 }
367                                                 
368                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
369                                                 printData(temp);
370                                                 delete temp;
371                                         }else{
372                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
373                                                 printData(&oldList);
374                                         }
375                                 }
376         
377                                 previousDist = dist;
378                                 rndPreviousDist = rndDist;
379                                 oldList = *list;
380                                 Seq2Bin = cluster->getSeqtoBin();
381                                 oldSeq2Bin = Seq2Bin;
382                         }
383                         
384                         if(previousDist <= 0.0000){
385                                 oldList.setLabel("unique");
386                                 printData(&oldList);
387                         }
388                         else if(rndPreviousDist<cutoff){
389                                 if (merge) {
390                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
391                                         
392                                         if (m->control_pressed) { 
393                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
394                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
395                                                         outputTypes.clear();
396                                                         return 0; 
397                                         }
398                                         
399                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
400                                         printData(temp);
401                                         delete temp;
402                                 }else{
403                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
404                                         printData(&oldList);
405                                 }
406                         }
407                         
408                         //free memory
409                         overlapMatrix.clear();
410                         delete distMatrix;
411                         delete cluster;
412                         
413                 }else { //use hcluster to cluster
414                         //get distmatrix and overlap
415                         overlapFile = read->getOverlapFile();
416                         distFile = read->getDistFile(); 
417                         delete read;
418                 
419                         //sort the distance and overlap files
420                         sortHclusterFiles(distFile, overlapFile);
421                         
422                         if (m->control_pressed) { 
423                                 delete nameMap;  delete list; delete rabund; 
424                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
425                                 outputTypes.clear();
426                                 return 0; 
427                         }
428                 
429                         //create cluster
430                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
431                         hcluster->setMapWanted(true);
432                         Seq2Bin = cluster->getSeqtoBin();
433                         oldSeq2Bin = Seq2Bin;
434                         
435                         vector<seqDist> seqs; seqs.resize(1); // to start loop
436                         //ifstream inHcluster;
437                         //m->openInputFile(distFile, inHcluster);
438                         
439                         if (m->control_pressed) { 
440                                 delete nameMap;  delete list; delete rabund; delete hcluster;
441                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
442                                 outputTypes.clear();
443                                 return 0; 
444                         }
445
446                         while (seqs.size() != 0){
447                 
448                                 seqs = hcluster->getSeqs();
449                                 
450                                 //to account for cutoff change in average neighbor
451                                 if (seqs.size() != 0) {
452                                         if (seqs[0].dist > cutoff) { break; }
453                                 }
454                                 
455                                 if (m->control_pressed) { 
456                                         delete nameMap;  delete list; delete rabund; delete hcluster;
457                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
458                                         m->mothurRemove(distFile);
459                                         m->mothurRemove(overlapFile);
460                                         outputTypes.clear();
461                                         return 0; 
462                                 }
463                                 
464                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
465                                         
466                                         if (seqs[i].seq1 != seqs[i].seq2) {
467                 
468                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
469                                                 
470                                                 if (m->control_pressed) { 
471                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
472                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
473                                                         m->mothurRemove(distFile);
474                                                         m->mothurRemove(overlapFile);
475                                                         outputTypes.clear();
476                                                         return 0; 
477                                                 }
478         
479                                                 float rndDist;
480                                                 if (hard) {
481                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
482                                                 }else{
483                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
484                                                 }
485                                                                                                 
486                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
487                                                         oldList.setLabel("unique");
488                                                         printData(&oldList);
489                                                 }
490                                                 else if((rndDist != rndPreviousDist)){
491                                                         if (merge) {
492                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
493                                                                 
494                                                                 if (m->control_pressed) { 
495                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
496                                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
497                                                                         m->mothurRemove(distFile);
498                                                                         m->mothurRemove(overlapFile);
499                                                                         outputTypes.clear();
500                                                                         return 0; 
501                                                                 }
502
503                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
504                                                                 printData(temp);
505                                                                 delete temp;
506                                                         }else{
507                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
508                                                                 printData(&oldList);
509                                                         }
510                                                 }
511                                                 
512                                                 previousDist = seqs[i].dist;
513                                                 rndPreviousDist = rndDist;
514                                                 oldList = *list;
515                                                 Seq2Bin = cluster->getSeqtoBin();
516                                                 oldSeq2Bin = Seq2Bin;
517                                         }
518                                 }
519                         }
520                         //inHcluster.close();
521                         
522                         if(previousDist <= 0.0000){
523                                 oldList.setLabel("unique");
524                                 printData(&oldList);
525                         }
526                         else if(rndPreviousDist<cutoff){
527                                 if (merge) {
528                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
529                                         
530                                         if (m->control_pressed) { 
531                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
532                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
533                                                         m->mothurRemove(distFile);
534                                                         m->mothurRemove(overlapFile);
535                                                         outputTypes.clear();
536                                                         return 0; 
537                                         }
538                                         
539                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
540                                         printData(temp);
541                                         delete temp;
542                                 }else{
543                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
544                                         printData(&oldList);
545                                 }
546                         }
547                         
548                         delete hcluster;
549                         m->mothurRemove(distFile);
550                         m->mothurRemove(overlapFile);
551                 }
552                 
553                 delete list;
554                 delete rabund;
555                 listFile.close();
556         if (countfile == "") {
557             sabundFile.close();
558             rabundFile.close();
559         }
560                 if (m->control_pressed) { 
561                         delete nameMap; 
562                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
563                         outputTypes.clear();
564                         return 0; 
565                 }
566                 
567                 m->mothurOutEndLine();
568                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
569                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
570                 if (countfile == "") {
571             m->mothurOut(rabundFileName); m->mothurOutEndLine();        outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
572             m->mothurOut(sabundFileName); m->mothurOutEndLine();        outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
573         }
574                 m->mothurOutEndLine();
575                 
576                 if (saveCutoff != cutoff) { 
577                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
578                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
579                         
580                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
581                 }
582                 
583                 //set list file as new current listfile
584                 string current = "";
585                 itTypes = outputTypes.find("list");
586                 if (itTypes != outputTypes.end()) {
587                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
588                 }
589                 
590                 //set rabund file as new current rabundfile
591                 itTypes = outputTypes.find("rabund");
592                 if (itTypes != outputTypes.end()) {
593                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
594                 }
595                 
596                 //set sabund file as new current sabundfile
597                 itTypes = outputTypes.find("sabund");
598                 if (itTypes != outputTypes.end()) {
599                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
600                 }
601                 
602                 
603                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
604                         
605                 return 0;
606         }
607         catch(exception& e) {
608                 m->errorOut(e, "MGClusterCommand", "execute");
609                 exit(1);
610         }
611 }
612 //**********************************************************************************************************************
613 void MGClusterCommand::printData(ListVector* mergedList){
614         try {
615                 mergedList->print(listFile);
616         SAbundVector sabund = mergedList->getSAbundVector();
617         
618         if (countfile == "") {
619             mergedList->getRAbundVector().print(rabundFile);
620             sabund.print(sabundFile);
621         }
622
623                 sabund.print(cout);
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "MGClusterCommand", "printData");
627                 exit(1);
628         }
629 }
630 //**********************************************************************************************************************
631 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
632 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
633 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
634         try {
635                 //create new listvector so you don't overwrite the clustering
636                 ListVector* newList = new ListVector(oldList);
637
638                 bool done = false;
639                 ifstream inOverlap;
640                 int count = 0;
641                 
642                 if (hclusterWanted) {  
643                         m->openInputFile(overlapFile, inOverlap);  
644                         if (inOverlap.eof()) {  done = true;  }
645                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
646                 
647                 while (!done) {
648                         if (m->control_pressed) { 
649                                 if (hclusterWanted) {   inOverlap.close();  }           
650                                 return newList;
651                         }
652                         
653                         //get next overlap
654                         seqDist overlapNode;
655                         if (!hclusterWanted) {  
656                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
657                                         overlapNode = overlapMatrix[count];
658                                         count++;
659                                 }else { break; }
660                         }else { 
661                                 if (!inOverlap.eof()) {
662                                         string firstName, secondName;
663                                         float overlapDistance;
664                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
665                                         
666                                         //commented out because we check this in readblast already
667                                         //map<string,int>::iterator itA = nameMap->find(firstName);
668                                         //map<string,int>::iterator itB = nameMap->find(secondName);
669                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
670                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
671                                         
672                                         //overlapNode.seq1 = itA->second;
673                                         //overlapNode.seq2 = itB->second;
674                                         overlapNode.seq1 = nameMap->get(firstName);
675                                         overlapNode.seq2 = nameMap->get(secondName);
676                                         overlapNode.dist = overlapDistance;
677                                 }else { inOverlap.close(); break; }
678                         } 
679                 
680                         if (overlapNode.dist < dist) {
681                                 //get names of seqs that overlap
682                                 string name1 = nameMap->get(overlapNode.seq1);
683                                 string name2 = nameMap->get(overlapNode.seq2);
684                         
685                                 //use binInfo to find out if they are already in the same bin
686                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
687                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
688                                 
689                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
690                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
691
692                                 //int binKeep = itBin1->second;
693                                 //int binRemove = itBin2->second;
694                                 
695                                 int binKeep = binInfo[name1];
696                                 int binRemove = binInfo[name2];
697                         
698                                 //if not merge bins and update binInfo
699                                 if(binKeep != binRemove) {
700                 
701                                         //save names in old bin
702                                         string names = newList->get(binRemove);
703                 
704                                         //merge bins into name1s bin
705                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
706                                         newList->set(binRemove, "");    
707                                         
708                                         //update binInfo
709                                         while (names.find_first_of(',') != -1) { 
710                                                 //get name from bin
711                                                 string name = names.substr(0,names.find_first_of(','));
712                                                 //save name and bin number
713                                                 binInfo[name] = binKeep;
714                                                 names = names.substr(names.find_first_of(',')+1, names.length());
715                                         }
716                                         
717                                         //get last name
718                                         binInfo[names] = binKeep;
719                                 }
720                                 
721                         }else { done = true; }
722                 }
723                 
724                 //return listvector
725                 return newList;
726                                 
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
730                 exit(1);
731         }
732 }
733 //**********************************************************************************************************************
734 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
735         try {
736                 //sort distFile
737                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
738                 m->mothurRemove(unsortedDist);  //delete unsorted file
739                 distFile = sortedDistFile;
740                 
741                 //sort overlap file
742                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
743                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
744                 overlapFile = sortedOverlapFile;
745         }
746         catch(exception& e) {
747                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
748                 exit(1);
749         }
750 }
751
752 //**********************************************************************************************************************
753
754 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
755     try {
756         //vector<string> names = ct.getNamesOfSeqs();
757
758         //for ( int i; i < ct.getNumGroups(); i++ ) {    rav.push_back( ct.getNumSeqs(names[i]) );    }
759         //return rav;
760         
761         for(int i = 0; i < list->getNumBins(); i++) { 
762            vector<string> binNames;
763            string bin = list->get(i);
764            m->splitAtComma(bin, binNames);
765            int total = 0;
766            for (int j = 0; j < binNames.size(); j++) { 
767                total += ct->getNumSeqs(binNames[j]);
768            }
769            rabund->push_back(total);   
770        }
771         
772         
773     }
774     catch(exception& e) {
775                 m->errorOut(e, "MGClusterCommand", "createRabund");
776                 exit(1);
777         }
778     
779 }
780
781 //**********************************************************************************************************************
782
783