]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
changes while testing
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
17                 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
18                 CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
19                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
22                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
23                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
24                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
25                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
26                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string MGClusterCommand::getHelpString(){       
41         try {
42                 string helpString = "";
43                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
44                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
45                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
46                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
47                 helpString += "The precision parameter's default value is 100. \n";
48                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
49                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
50                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
51                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
52                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
53                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
54                 helpString += "The mgcluster command should be in the following format: \n";
55                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "MGClusterCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string MGClusterCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } 
70         else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; } 
71         else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
72         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
73         
74         return pattern;
75     }
76     catch(exception& e) {
77         m->errorOut(e, "MGClusterCommand", "getOutputPattern");
78         exit(1);
79     }
80 }
81 //*******************************************************************************************************************
82 MGClusterCommand::MGClusterCommand(){   
83         try {
84                 abort = true; calledHelp = true; 
85                 setParameters();
86                 vector<string> tempOutNames;
87                 outputTypes["list"] = tempOutNames;
88                 outputTypes["rabund"] = tempOutNames;
89                 outputTypes["sabund"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97 MGClusterCommand::MGClusterCommand(string option) {
98         try {
99                 abort = false; calledHelp = false;   
100                 
101                 //allow user to run help
102                 if(option == "help") { help(); abort = true; calledHelp = true; }
103                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
104                 
105                 else {
106                         vector<string> myArray = setParameters();
107                         
108                         OptionParser parser(option);
109                         map<string, string> parameters = parser.getParameters();
110                         
111                         ValidParameters validParameter;
112                         map<string,string>::iterator it;
113                 
114                         //check to make sure all parameters are valid for command
115                         for (it = parameters.begin(); it != parameters.end(); it++) { 
116                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
117                         }
118                         
119                         //initialize outputTypes
120                         vector<string> tempOutNames;
121                         outputTypes["list"] = tempOutNames;
122                         outputTypes["rabund"] = tempOutNames;
123                         outputTypes["sabund"] = tempOutNames;
124                         
125                         //if the user changes the input directory command factory will send this info to us in the output parameter 
126                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
127                         if (inputDir == "not found"){   inputDir = "";          }
128                         else {
129                                 string path;
130                                 it = parameters.find("blast");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
136                                 }
137                                 
138                                 it = parameters.find("name");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
144                                 }
145                 
146                 it = parameters.find("count");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
152                                 }
153                         }
154
155                         
156                         //check for required parameters
157                         blastfile = validParameter.validFile(parameters, "blast", true);
158                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
159                         else if (blastfile == "not found") { blastfile = ""; }
160                         
161                         //if the user changes the output directory command factory will send this info to us in the output parameter 
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
163                                 outputDir = ""; 
164                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
165                         }
166                         
167                         namefile = validParameter.validFile(parameters, "name", true);
168                         if (namefile == "not open") { abort = true; }   
169                         else if (namefile == "not found") { namefile = ""; }
170                         else { m->setNameFile(namefile); }
171             
172             countfile = validParameter.validFile(parameters, "count", true);
173                         if (countfile == "not open") { abort = true; }  
174                         else if (countfile == "not found") { countfile = ""; }
175             else { m->setCountTableFile(countfile); }
176             
177             if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
178                         
179                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
180                         
181                         //check for optional parameter and set defaults
182                         string temp;
183             temp = validParameter.validFile(parameters, "precision", false);            if (temp == "not found") { temp = "100"; }
184                         precisionLength = temp.length();
185                         m->mothurConvert(temp, precision); 
186                         
187                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
188                         m->mothurConvert(temp, cutoff); 
189                         cutoff += (5 / (precision * 10.0));
190                         
191                         method = validParameter.validFile(parameters, "method", false);
192                         if (method == "not found") { method = "average"; }
193                         
194                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
195                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
196
197                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
198                         m->mothurConvert(temp, length); 
199                         
200                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
201                         m->mothurConvert(temp, penalty); 
202                         
203                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
204                         minWanted = m->isTrue(temp); 
205                         
206                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
207                         merge = m->isTrue(temp); 
208                         
209                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
210                         hclusterWanted = m->isTrue(temp); 
211                         
212                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
213                         hard = m->isTrue(temp);            
214                 }
215
216         }
217         catch(exception& e) {
218                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
219                 exit(1);
220         }
221 }
222 //**********************************************************************************************************************
223 int MGClusterCommand::execute(){
224         try {
225                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
226                 
227                 //read names file
228                 if (namefile != "") {
229                         nameMap = new NameAssignment(namefile);
230                         nameMap->readMap();
231                 }else{ nameMap= new NameAssignment(); }
232                 
233                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
234                 string tag = "";
235                 time_t start;
236                 float previousDist = 0.00000;
237                 float rndPreviousDist = 0.00000; 
238         
239                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
240                 //must remember to delete those objects here since readBlast does not
241                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
242                 read->read(nameMap);
243         
244         list = new ListVector(nameMap->getListVector());
245         RAbundVector* rabund = NULL;
246         
247         if(countfile != "") {
248             //map<string, int> nameMapCounts = m->readNames(namefile);
249             ct = new CountTable();
250             ct->readTable(countfile, false);
251             rabund = new RAbundVector();
252             createRabund(ct, list, rabund);
253         }else {
254             rabund = new RAbundVector(list->getRAbundVector());
255         }
256         
257                 
258                 //list = new ListVector(nameMap->getListVector());
259                 //rabund = new RAbundVector(list->getRAbundVector());
260                 
261                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
262                 
263                 start = time(NULL);
264                 oldList = *list;
265                 map<string, int> Seq2Bin;
266                 map<string, int> oldSeq2Bin;
267                 
268                 if (method == "furthest")               { tag = "fn";  }
269                 else if (method == "nearest")   { tag = "nn";  }
270                 else                                                    { tag = "an";  }
271                 
272         map<string, string> variables; 
273         variables["[filename]"] = fileroot;
274         variables["[clustertag]"] = tag;
275         string sabundFileName = getOutputFileName("sabund", variables);
276         string rabundFileName = getOutputFileName("rabund", variables);
277         if (countfile != "") { variables["[tag2]"] = "unique_list"; }
278         string listFileName = getOutputFileName("list", variables);
279         
280         if (countfile == "") {
281             m->openOutputFile(sabundFileName,   sabundFile);
282             m->openOutputFile(rabundFileName,   rabundFile);
283         }
284                 m->openOutputFile(listFileName, listFile);
285                 
286                 if (m->control_pressed) { 
287                         delete nameMap; delete read; delete list; delete rabund; 
288                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
289                         outputTypes.clear();
290                         return 0; 
291                 }
292                 
293                 double saveCutoff = cutoff;
294                 
295                 if (!hclusterWanted) {
296                         //get distmatrix and overlap
297                         SparseDistanceMatrix* distMatrix = read->getDistMatrix();
298                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
299                         delete read;
300                 
301                         //create cluster
302                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
303                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
304                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
305                         cluster->setMapWanted(true);
306                         Seq2Bin = cluster->getSeqtoBin();
307                         oldSeq2Bin = Seq2Bin;
308                         
309                         if (m->control_pressed) { 
310                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
311                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
312                                 outputTypes.clear();
313                                 return 0; 
314                         }
315             
316             
317                         //cluster using cluster classes
318                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
319                                 
320                 if (m->debug) {  cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
321                 
322                                 cluster->update(cutoff);
323                                 
324                                 if (m->control_pressed) { 
325                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
326                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
327                                         outputTypes.clear();
328                                         return 0; 
329                                 }
330                                 
331                                 float dist = distMatrix->getSmallDist();
332                                 float rndDist;
333                                 if (hard) {
334                                         rndDist = m->ceilDist(dist, precision); 
335                                 }else{
336                                         rndDist = m->roundDist(dist, precision); 
337                                 }
338                                 
339                                 if(previousDist <= 0.0000 && dist != previousDist){
340                                         oldList.setLabel("unique");
341                                         printData(&oldList);
342                                 }
343                                 else if(rndDist != rndPreviousDist){
344                                         if (merge) {
345                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
346                                                 
347                                                 if (m->control_pressed) { 
348                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
349                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
350                                                         outputTypes.clear();
351                                                         return 0; 
352                                                 }
353                                                 
354                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
355                                                 printData(temp);
356                                                 delete temp;
357                                         }else{
358                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
359                                                 printData(&oldList);
360                                         }
361                                 }
362         
363                                 previousDist = dist;
364                                 rndPreviousDist = rndDist;
365                                 oldList = *list;
366                                 Seq2Bin = cluster->getSeqtoBin();
367                                 oldSeq2Bin = Seq2Bin;
368                         }
369                         
370                         if(previousDist <= 0.0000){
371                                 oldList.setLabel("unique");
372                                 printData(&oldList);
373                         }
374                         else if(rndPreviousDist<cutoff){
375                                 if (merge) {
376                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
377                                         
378                                         if (m->control_pressed) { 
379                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
380                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
381                                                         outputTypes.clear();
382                                                         return 0; 
383                                         }
384                                         
385                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
386                                         printData(temp);
387                                         delete temp;
388                                 }else{
389                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
390                                         printData(&oldList);
391                                 }
392                         }
393                         
394                         //free memory
395                         overlapMatrix.clear();
396                         delete distMatrix;
397                         delete cluster;
398                         
399                 }else { //use hcluster to cluster
400                         //get distmatrix and overlap
401                         overlapFile = read->getOverlapFile();
402                         distFile = read->getDistFile(); 
403                         delete read;
404                 
405                         //sort the distance and overlap files
406                         sortHclusterFiles(distFile, overlapFile);
407                         
408                         if (m->control_pressed) { 
409                                 delete nameMap;  delete list; delete rabund; 
410                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
411                                 outputTypes.clear();
412                                 return 0; 
413                         }
414                 
415                         //create cluster
416                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
417                         hcluster->setMapWanted(true);
418                         Seq2Bin = cluster->getSeqtoBin();
419                         oldSeq2Bin = Seq2Bin;
420                         
421                         vector<seqDist> seqs; seqs.resize(1); // to start loop
422                         //ifstream inHcluster;
423                         //m->openInputFile(distFile, inHcluster);
424                         
425                         if (m->control_pressed) { 
426                                 delete nameMap;  delete list; delete rabund; delete hcluster;
427                                 listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
428                                 outputTypes.clear();
429                                 return 0; 
430                         }
431
432                         while (seqs.size() != 0){
433                 
434                                 seqs = hcluster->getSeqs();
435                                 
436                                 //to account for cutoff change in average neighbor
437                                 if (seqs.size() != 0) {
438                                         if (seqs[0].dist > cutoff) { break; }
439                                 }
440                                 
441                                 if (m->control_pressed) { 
442                                         delete nameMap;  delete list; delete rabund; delete hcluster;
443                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
444                                         m->mothurRemove(distFile);
445                                         m->mothurRemove(overlapFile);
446                                         outputTypes.clear();
447                                         return 0; 
448                                 }
449                                 
450                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
451                                         
452                                         if (seqs[i].seq1 != seqs[i].seq2) {
453                 
454                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
455                                                 
456                                                 if (m->control_pressed) { 
457                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
458                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
459                                                         m->mothurRemove(distFile);
460                                                         m->mothurRemove(overlapFile);
461                                                         outputTypes.clear();
462                                                         return 0; 
463                                                 }
464         
465                                                 float rndDist;
466                                                 if (hard) {
467                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
468                                                 }else{
469                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
470                                                 }
471                                                                                                 
472                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
473                                                         oldList.setLabel("unique");
474                                                         printData(&oldList);
475                                                 }
476                                                 else if((rndDist != rndPreviousDist)){
477                                                         if (merge) {
478                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
479                                                                 
480                                                                 if (m->control_pressed) { 
481                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
482                                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
483                                                                         m->mothurRemove(distFile);
484                                                                         m->mothurRemove(overlapFile);
485                                                                         outputTypes.clear();
486                                                                         return 0; 
487                                                                 }
488
489                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
490                                                                 printData(temp);
491                                                                 delete temp;
492                                                         }else{
493                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
494                                                                 printData(&oldList);
495                                                         }
496                                                 }
497                                                 
498                                                 previousDist = seqs[i].dist;
499                                                 rndPreviousDist = rndDist;
500                                                 oldList = *list;
501                                                 Seq2Bin = cluster->getSeqtoBin();
502                                                 oldSeq2Bin = Seq2Bin;
503                                         }
504                                 }
505                         }
506                         //inHcluster.close();
507                         
508                         if(previousDist <= 0.0000){
509                                 oldList.setLabel("unique");
510                                 printData(&oldList);
511                         }
512                         else if(rndPreviousDist<cutoff){
513                                 if (merge) {
514                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
515                                         
516                                         if (m->control_pressed) { 
517                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
518                                                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
519                                                         m->mothurRemove(distFile);
520                                                         m->mothurRemove(overlapFile);
521                                                         outputTypes.clear();
522                                                         return 0; 
523                                         }
524                                         
525                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
526                                         printData(temp);
527                                         delete temp;
528                                 }else{
529                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
530                                         printData(&oldList);
531                                 }
532                         }
533                         
534                         delete hcluster;
535                         m->mothurRemove(distFile);
536                         m->mothurRemove(overlapFile);
537                 }
538                 
539                 delete list;
540                 delete rabund;
541                 listFile.close();
542         if (countfile == "") {
543             sabundFile.close();
544             rabundFile.close();
545         }
546                 if (m->control_pressed) { 
547                         delete nameMap; 
548                         listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
549                         outputTypes.clear();
550                         return 0; 
551                 }
552                 
553                 m->mothurOutEndLine();
554                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
555                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
556                 if (countfile == "") {
557             m->mothurOut(rabundFileName); m->mothurOutEndLine();        outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
558             m->mothurOut(sabundFileName); m->mothurOutEndLine();        outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
559         }
560                 m->mothurOutEndLine();
561                 
562                 if (saveCutoff != cutoff) { 
563                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
564                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
565                         
566                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
567                 }
568                 
569                 //set list file as new current listfile
570                 string current = "";
571                 itTypes = outputTypes.find("list");
572                 if (itTypes != outputTypes.end()) {
573                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
574                 }
575                 
576                 //set rabund file as new current rabundfile
577                 itTypes = outputTypes.find("rabund");
578                 if (itTypes != outputTypes.end()) {
579                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
580                 }
581                 
582                 //set sabund file as new current sabundfile
583                 itTypes = outputTypes.find("sabund");
584                 if (itTypes != outputTypes.end()) {
585                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
586                 }
587                 
588                 
589                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
590                         
591                 return 0;
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "MGClusterCommand", "execute");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599 void MGClusterCommand::printData(ListVector* mergedList){
600         try {
601                 mergedList->print(listFile);
602         SAbundVector sabund = mergedList->getSAbundVector();
603         
604         if (countfile == "") {
605             mergedList->getRAbundVector().print(rabundFile);
606             sabund.print(sabundFile);
607         }
608
609                 sabund.print(cout);
610         }
611         catch(exception& e) {
612                 m->errorOut(e, "MGClusterCommand", "printData");
613                 exit(1);
614         }
615 }
616 //**********************************************************************************************************************
617 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
618 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
619 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
620         try {
621                 //create new listvector so you don't overwrite the clustering
622                 ListVector* newList = new ListVector(oldList);
623
624                 bool done = false;
625                 ifstream inOverlap;
626                 int count = 0;
627                 
628                 if (hclusterWanted) {  
629                         m->openInputFile(overlapFile, inOverlap);  
630                         if (inOverlap.eof()) {  done = true;  }
631                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
632                 
633                 while (!done) {
634                         if (m->control_pressed) { 
635                                 if (hclusterWanted) {   inOverlap.close();  }           
636                                 return newList;
637                         }
638                         
639                         //get next overlap
640                         seqDist overlapNode;
641                         if (!hclusterWanted) {  
642                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
643                                         overlapNode = overlapMatrix[count];
644                                         count++;
645                                 }else { break; }
646                         }else { 
647                                 if (!inOverlap.eof()) {
648                                         string firstName, secondName;
649                                         float overlapDistance;
650                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
651                                         
652                                         //commented out because we check this in readblast already
653                                         //map<string,int>::iterator itA = nameMap->find(firstName);
654                                         //map<string,int>::iterator itB = nameMap->find(secondName);
655                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
656                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
657                                         
658                                         //overlapNode.seq1 = itA->second;
659                                         //overlapNode.seq2 = itB->second;
660                                         overlapNode.seq1 = nameMap->get(firstName);
661                                         overlapNode.seq2 = nameMap->get(secondName);
662                                         overlapNode.dist = overlapDistance;
663                                 }else { inOverlap.close(); break; }
664                         } 
665                 
666                         if (overlapNode.dist < dist) {
667                                 //get names of seqs that overlap
668                                 string name1 = nameMap->get(overlapNode.seq1);
669                                 string name2 = nameMap->get(overlapNode.seq2);
670                         
671                                 //use binInfo to find out if they are already in the same bin
672                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
673                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
674                                 
675                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
676                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
677
678                                 //int binKeep = itBin1->second;
679                                 //int binRemove = itBin2->second;
680                                 
681                                 int binKeep = binInfo[name1];
682                                 int binRemove = binInfo[name2];
683                         
684                                 //if not merge bins and update binInfo
685                                 if(binKeep != binRemove) {
686                 
687                                         //save names in old bin
688                                         string names = newList->get(binRemove);
689                 
690                                         //merge bins into name1s bin
691                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
692                                         newList->set(binRemove, "");    
693                                         
694                                         //update binInfo
695                                         while (names.find_first_of(',') != -1) { 
696                                                 //get name from bin
697                                                 string name = names.substr(0,names.find_first_of(','));
698                                                 //save name and bin number
699                                                 binInfo[name] = binKeep;
700                                                 names = names.substr(names.find_first_of(',')+1, names.length());
701                                         }
702                                         
703                                         //get last name
704                                         binInfo[names] = binKeep;
705                                 }
706                                 
707                         }else { done = true; }
708                 }
709                 
710                 //return listvector
711                 return newList;
712                                 
713         }
714         catch(exception& e) {
715                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
716                 exit(1);
717         }
718 }
719 //**********************************************************************************************************************
720 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
721         try {
722                 //sort distFile
723                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
724                 m->mothurRemove(unsortedDist);  //delete unsorted file
725                 distFile = sortedDistFile;
726                 
727                 //sort overlap file
728                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
729                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
730                 overlapFile = sortedOverlapFile;
731         }
732         catch(exception& e) {
733                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
734                 exit(1);
735         }
736 }
737
738 //**********************************************************************************************************************
739
740 void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
741     try {
742         //vector<string> names = ct.getNamesOfSeqs();
743
744         //for ( int i; i < ct.getNumGroups(); i++ ) {    rav.push_back( ct.getNumSeqs(names[i]) );    }
745         //return rav;
746         
747         for(int i = 0; i < list->getNumBins(); i++) { 
748            vector<string> binNames;
749            string bin = list->get(i);
750            m->splitAtComma(bin, binNames);
751            int total = 0;
752            for (int j = 0; j < binNames.size(); j++) { 
753                total += ct->getNumSeqs(binNames[j]);
754            }
755            rabund->push_back(total);   
756        }
757         
758         
759     }
760     catch(exception& e) {
761                 m->errorOut(e, "MGClusterCommand", "createRabund");
762                 exit(1);
763         }
764     
765 }
766
767 //**********************************************************************************************************************
768
769