]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
removed comments
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
26                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){       
41         try {
42                 string helpString = "";
43                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47                 helpString += "The precision parameter's default value is 100. \n";
48                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
49                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
51                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
52                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
53                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54                 helpString += "The mgcluster command should be in the following format: \n";
55                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MGClusterCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
66         try {
67         string outputFileName = "";
68                 map<string, vector<string> >::iterator it;
69         
70         //is this a type this command creates
71         it = outputTypes.find(type);
72         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73         else {
74             if (type == "list") {  outputFileName =  "list"; }
75             else if (type == "rabund") {  outputFileName =  "rabund"; }
76             else if (type == "sabund") {  outputFileName =  "sabund"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 MGClusterCommand::MGClusterCommand(){   
88         try {
89                 abort = true; calledHelp = true; 
90                 setParameters();
91                 vector<string> tempOutNames;
92                 outputTypes["list"] = tempOutNames;
93                 outputTypes["rabund"] = tempOutNames;
94                 outputTypes["sabund"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102 MGClusterCommand::MGClusterCommand(string option) {
103         try {
104                 abort = false; calledHelp = false;   
105                 
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 
110                 else {
111                         vector<string> myArray = setParameters();
112                         
113                         OptionParser parser(option);
114                         map<string, string> parameters = parser.getParameters();
115                         
116                         ValidParameters validParameter;
117                         map<string,string>::iterator it;
118                 
119                         //check to make sure all parameters are valid for command
120                         for (it = parameters.begin(); it != parameters.end(); it++) { 
121                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
122                         }
123                         
124                         //initialize outputTypes
125                         vector<string> tempOutNames;
126                         outputTypes["list"] = tempOutNames;
127                         outputTypes["rabund"] = tempOutNames;
128                         outputTypes["sabund"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("blast");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
141                                 }
142                                 
143                                 it = parameters.find("name");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
149                                 }
150                         }
151
152                         
153                         //check for required parameters
154                         blastfile = validParameter.validFile(parameters, "blast", true);
155                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
156                         else if (blastfile == "not found") { blastfile = ""; }
157                         
158                         //if the user changes the output directory command factory will send this info to us in the output parameter 
159                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
160                                 outputDir = ""; 
161                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
162                         }
163                         
164                         namefile = validParameter.validFile(parameters, "name", true);
165                         if (namefile == "not open") { abort = true; }   
166                         else if (namefile == "not found") { namefile = ""; }
167                         else { m->setNameFile(namefile); }
168                         
169                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
170                         
171                         //check for optional parameter and set defaults
172                         string temp;
173             temp = validParameter.validFile(parameters, "large", false);                        if (temp == "not found") { temp = "false"; }            
174                         large = m->isTrue(temp); 
175             
176                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
177                         precisionLength = temp.length();
178                         m->mothurConvert(temp, precision); 
179                         
180                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
181                         m->mothurConvert(temp, cutoff); 
182                         cutoff += (5 / (precision * 10.0));
183                         
184                         method = validParameter.validFile(parameters, "method", false);
185                         if (method == "not found") { method = "average"; }
186                         
187                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
188                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
189
190                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
191                         m->mothurConvert(temp, length); 
192                         
193                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
194                         m->mothurConvert(temp, penalty); 
195                         
196                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
197                         minWanted = m->isTrue(temp); 
198                         
199                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
200                         merge = m->isTrue(temp); 
201                         
202                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
203                         hclusterWanted = m->isTrue(temp); 
204                         
205                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
206                         hard = m->isTrue(temp);            
207                 }
208
209         }
210         catch(exception& e) {
211                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
212                 exit(1);
213         }
214 }
215 //**********************************************************************************************************************
216 int MGClusterCommand::execute(){
217         try {
218                 cout << "1" << endl;
219                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
220                 
221                 //read names file
222                 if (namefile != "") {
223                         nameMap = new NameAssignment(namefile);
224                         nameMap->readMap();
225                 }else{ nameMap= new NameAssignment(); }
226                 
227                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
228                 string tag = "";
229                 time_t start;
230                 float previousDist = 0.00000;
231                 float rndPreviousDist = 0.00000; 
232         
233                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
234                 //must remember to delete those objects here since readBlast does not
235                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
236                 read->read(nameMap);
237         
238         list = new ListVector(nameMap->getListVector());
239         RAbundVector* rabund = NULL;
240         
241         if(large) {
242             map<string, int> nameMapCounts = m->readNames(namefile);
243             createRabund(nameMapCounts);
244             rabund = &rav;
245         }else {
246             rabund = new RAbundVector(list->getRAbundVector());
247         }
248         
249                 
250                 list = new ListVector(nameMap->getListVector());
251                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
252                 
253                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
254                 
255                 start = time(NULL);
256                 oldList = *list;
257                 map<string, int> Seq2Bin;
258                 map<string, int> oldSeq2Bin;
259                 
260                 if (method == "furthest")               { tag = "fn";  }
261                 else if (method == "nearest")   { tag = "nn";  }
262                 else                                                    { tag = "an";  }
263                 
264         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
265         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
266         string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
267         
268                 m->openOutputFile(sabundFileName,       sabundFile);
269                 m->openOutputFile(rabundFileName,       rabundFile);
270                 m->openOutputFile(listFileName, listFile);
271                 
272                 if (m->control_pressed) { 
273                         delete nameMap; delete read; delete list; delete rabund; 
274                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
275                         outputTypes.clear();
276                         return 0; 
277                 }
278                 
279                 double saveCutoff = cutoff;
280                 
281                 if (!hclusterWanted) {
282                         //get distmatrix and overlap
283                         SparseMatrix* distMatrix = read->getDistMatrix();
284                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
285                         delete read;
286                 
287                         //create cluster
288                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
289                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
290                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
291                         cluster->setMapWanted(true);
292                         Seq2Bin = cluster->getSeqtoBin();
293                         oldSeq2Bin = Seq2Bin;
294                         
295                         if (m->control_pressed) { 
296                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
297                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
298                                 outputTypes.clear();
299                                 return 0; 
300                         }
301                 
302                         //cluster using cluster classes
303                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
304                                 
305                                 cluster->update(cutoff);
306                                 
307                                 if (m->control_pressed) { 
308                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
309                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
310                                         outputTypes.clear();
311                                         return 0; 
312                                 }
313                                 
314                                 float dist = distMatrix->getSmallDist();
315                                 float rndDist;
316                                 if (hard) {
317                                         rndDist = m->ceilDist(dist, precision); 
318                                 }else{
319                                         rndDist = m->roundDist(dist, precision); 
320                                 }
321                                 
322                                 if(previousDist <= 0.0000 && dist != previousDist){
323                                         oldList.setLabel("unique");
324                                         printData(&oldList);
325                                 }
326                                 else if(rndDist != rndPreviousDist){
327                                         if (merge) {
328                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
329                                                 
330                                                 if (m->control_pressed) { 
331                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
332                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
333                                                         outputTypes.clear();
334                                                         return 0; 
335                                                 }
336                                                 
337                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
338                                                 printData(temp);
339                                                 delete temp;
340                                         }else{
341                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
342                                                 printData(&oldList);
343                                         }
344                                 }
345         
346                                 previousDist = dist;
347                                 rndPreviousDist = rndDist;
348                                 oldList = *list;
349                                 Seq2Bin = cluster->getSeqtoBin();
350                                 oldSeq2Bin = Seq2Bin;
351                         }
352                         
353                         if(previousDist <= 0.0000){
354                                 oldList.setLabel("unique");
355                                 printData(&oldList);
356                         }
357                         else if(rndPreviousDist<cutoff){
358                                 if (merge) {
359                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
360                                         
361                                         if (m->control_pressed) { 
362                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
363                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
364                                                         outputTypes.clear();
365                                                         return 0; 
366                                         }
367                                         
368                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
369                                         printData(temp);
370                                         delete temp;
371                                 }else{
372                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
373                                         printData(&oldList);
374                                 }
375                         }
376                         
377                         //free memory
378                         overlapMatrix.clear();
379                         delete distMatrix;
380                         delete cluster;
381                         
382                 }else { //use hcluster to cluster
383                         //get distmatrix and overlap
384                         overlapFile = read->getOverlapFile();
385                         distFile = read->getDistFile(); 
386                         delete read;
387                 
388                         //sort the distance and overlap files
389                         sortHclusterFiles(distFile, overlapFile);
390                         
391                         if (m->control_pressed) { 
392                                 delete nameMap;  delete list; delete rabund; 
393                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
394                                 outputTypes.clear();
395                                 return 0; 
396                         }
397                 
398                         //create cluster
399                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
400                         hcluster->setMapWanted(true);
401                         Seq2Bin = cluster->getSeqtoBin();
402                         oldSeq2Bin = Seq2Bin;
403                         
404                         vector<seqDist> seqs; seqs.resize(1); // to start loop
405                         //ifstream inHcluster;
406                         //m->openInputFile(distFile, inHcluster);
407                         
408                         if (m->control_pressed) { 
409                                 delete nameMap;  delete list; delete rabund; delete hcluster;
410                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
411                                 outputTypes.clear();
412                                 return 0; 
413                         }
414
415                         while (seqs.size() != 0){
416                 
417                                 seqs = hcluster->getSeqs();
418                                 
419                                 //to account for cutoff change in average neighbor
420                                 if (seqs.size() != 0) {
421                                         if (seqs[0].dist > cutoff) { break; }
422                                 }
423                                 
424                                 if (m->control_pressed) { 
425                                         delete nameMap;  delete list; delete rabund; delete hcluster;
426                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
427                                         m->mothurRemove(distFile);
428                                         m->mothurRemove(overlapFile);
429                                         outputTypes.clear();
430                                         return 0; 
431                                 }
432                                 
433                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
434                                         
435                                         if (seqs[i].seq1 != seqs[i].seq2) {
436                 
437                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
438                                                 
439                                                 if (m->control_pressed) { 
440                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
441                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
442                                                         m->mothurRemove(distFile);
443                                                         m->mothurRemove(overlapFile);
444                                                         outputTypes.clear();
445                                                         return 0; 
446                                                 }
447         
448                                                 float rndDist;
449                                                 if (hard) {
450                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
451                                                 }else{
452                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
453                                                 }
454                                                                                                 
455                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
456                                                         oldList.setLabel("unique");
457                                                         printData(&oldList);
458                                                 }
459                                                 else if((rndDist != rndPreviousDist)){
460                                                         if (merge) {
461                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
462                                                                 
463                                                                 if (m->control_pressed) { 
464                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
465                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
466                                                                         m->mothurRemove(distFile);
467                                                                         m->mothurRemove(overlapFile);
468                                                                         outputTypes.clear();
469                                                                         return 0; 
470                                                                 }
471
472                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
473                                                                 printData(temp);
474                                                                 delete temp;
475                                                         }else{
476                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
477                                                                 printData(&oldList);
478                                                         }
479                                                 }
480                                                 
481                                                 previousDist = seqs[i].dist;
482                                                 rndPreviousDist = rndDist;
483                                                 oldList = *list;
484                                                 Seq2Bin = cluster->getSeqtoBin();
485                                                 oldSeq2Bin = Seq2Bin;
486                                         }
487                                 }
488                         }
489                         //inHcluster.close();
490                         
491                         if(previousDist <= 0.0000){
492                                 oldList.setLabel("unique");
493                                 printData(&oldList);
494                         }
495                         else if(rndPreviousDist<cutoff){
496                                 if (merge) {
497                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
498                                         
499                                         if (m->control_pressed) { 
500                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
501                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
502                                                         m->mothurRemove(distFile);
503                                                         m->mothurRemove(overlapFile);
504                                                         outputTypes.clear();
505                                                         return 0; 
506                                         }
507                                         
508                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
509                                         printData(temp);
510                                         delete temp;
511                                 }else{
512                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
513                                         printData(&oldList);
514                                 }
515                         }
516                         
517                         delete hcluster;
518                         m->mothurRemove(distFile);
519                         m->mothurRemove(overlapFile);
520                 }
521                 
522                 delete list; 
523                 delete rabund;
524                 listFile.close();
525                 sabundFile.close();
526                 rabundFile.close();
527         
528                 if (m->control_pressed) { 
529                         delete nameMap; 
530                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
531                         outputTypes.clear();
532                         return 0; 
533                 }
534                 
535                 m->mothurOutEndLine();
536                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
537                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
538                 m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
539                 m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
540                 m->mothurOutEndLine();
541                 
542                 if (saveCutoff != cutoff) { 
543                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
544                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
545                         
546                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
547                 }
548                 
549                 //set list file as new current listfile
550                 string current = "";
551                 itTypes = outputTypes.find("list");
552                 if (itTypes != outputTypes.end()) {
553                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
554                 }
555                 
556                 //set rabund file as new current rabundfile
557                 itTypes = outputTypes.find("rabund");
558                 if (itTypes != outputTypes.end()) {
559                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
560                 }
561                 
562                 //set sabund file as new current sabundfile
563                 itTypes = outputTypes.find("sabund");
564                 if (itTypes != outputTypes.end()) {
565                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
566                 }
567                 
568                 
569                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
570                         
571                 return 0;
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "MGClusterCommand", "execute");
575                 exit(1);
576         }
577 }
578 //**********************************************************************************************************************
579 void MGClusterCommand::printData(ListVector* mergedList){
580         try {
581                 mergedList->print(listFile);
582                 mergedList->getRAbundVector().print(rabundFile);
583                 
584                 SAbundVector sabund = mergedList->getSAbundVector();
585
586                 sabund.print(cout);
587                 sabund.print(sabundFile);
588         }
589         catch(exception& e) {
590                 m->errorOut(e, "MGClusterCommand", "printData");
591                 exit(1);
592         }
593 }
594 //**********************************************************************************************************************
595 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
596 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
597 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
598         try {
599                 //create new listvector so you don't overwrite the clustering
600                 ListVector* newList = new ListVector(oldList);
601
602                 bool done = false;
603                 ifstream inOverlap;
604                 int count = 0;
605                 
606                 if (hclusterWanted) {  
607                         m->openInputFile(overlapFile, inOverlap);  
608                         if (inOverlap.eof()) {  done = true;  }
609                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
610                 
611                 while (!done) {
612                         if (m->control_pressed) { 
613                                 if (hclusterWanted) {   inOverlap.close();  }           
614                                 return newList;
615                         }
616                         
617                         //get next overlap
618                         seqDist overlapNode;
619                         if (!hclusterWanted) {  
620                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
621                                         overlapNode = overlapMatrix[count];
622                                         count++;
623                                 }else { break; }
624                         }else { 
625                                 if (!inOverlap.eof()) {
626                                         string firstName, secondName;
627                                         float overlapDistance;
628                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
629                                         
630                                         //commented out because we check this in readblast already
631                                         //map<string,int>::iterator itA = nameMap->find(firstName);
632                                         //map<string,int>::iterator itB = nameMap->find(secondName);
633                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
634                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
635                                         
636                                         //overlapNode.seq1 = itA->second;
637                                         //overlapNode.seq2 = itB->second;
638                                         overlapNode.seq1 = nameMap->get(firstName);
639                                         overlapNode.seq2 = nameMap->get(secondName);
640                                         overlapNode.dist = overlapDistance;
641                                 }else { inOverlap.close(); break; }
642                         } 
643                 
644                         if (overlapNode.dist < dist) {
645                                 //get names of seqs that overlap
646                                 string name1 = nameMap->get(overlapNode.seq1);
647                                 string name2 = nameMap->get(overlapNode.seq2);
648                         
649                                 //use binInfo to find out if they are already in the same bin
650                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
651                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
652                                 
653                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
654                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
655
656                                 //int binKeep = itBin1->second;
657                                 //int binRemove = itBin2->second;
658                                 
659                                 int binKeep = binInfo[name1];
660                                 int binRemove = binInfo[name2];
661                         
662                                 //if not merge bins and update binInfo
663                                 if(binKeep != binRemove) {
664                 
665                                         //save names in old bin
666                                         string names = newList->get(binRemove);
667                 
668                                         //merge bins into name1s bin
669                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
670                                         newList->set(binRemove, "");    
671                                         
672                                         //update binInfo
673                                         while (names.find_first_of(',') != -1) { 
674                                                 //get name from bin
675                                                 string name = names.substr(0,names.find_first_of(','));
676                                                 //save name and bin number
677                                                 binInfo[name] = binKeep;
678                                                 names = names.substr(names.find_first_of(',')+1, names.length());
679                                         }
680                                         
681                                         //get last name
682                                         binInfo[names] = binKeep;
683                                 }
684                                 
685                         }else { done = true; }
686                 }
687                 
688                 //return listvector
689                 return newList;
690                                 
691         }
692         catch(exception& e) {
693                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
694                 exit(1);
695         }
696 }
697 //**********************************************************************************************************************
698 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
699         try {
700                 //sort distFile
701                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
702                 m->mothurRemove(unsortedDist);  //delete unsorted file
703                 distFile = sortedDistFile;
704                 
705                 //sort overlap file
706                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
707                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
708                 overlapFile = sortedOverlapFile;
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
712                 exit(1);
713         }
714 }
715
716 //**********************************************************************************************************************
717
718 void MGClusterCommand::createRabund(map<string, int> nameMapCounts){
719     try {
720         //RAbundVector rav;
721         map<string,int>::iterator it;
722         //it = nameMapCounts.begin();
723         //for(int i = 0; i < list->getNumBins(); i++) {   rav.push_back((*it).second); it++;    }
724         for ( it=nameMapCounts.begin(); it!=nameMapCounts.end(); it++ ) {    rav.push_back( it->second );    }
725         //return rav;
726     }
727     catch(exception& e) {
728                 m->errorOut(e, "MGClusterCommand", "createRabund");
729                 exit(1);
730         }
731     
732 }
733
734 //**********************************************************************************************************************
735
736