]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
1.18.0
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MGClusterCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46                 helpString += "The precision parameter's default value is 100. \n";
47                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
48                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
50                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
51                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
52                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53                 helpString += "The mgcluster command should be in the following format: \n";
54                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MGClusterCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 MGClusterCommand::MGClusterCommand(){   
65         try {
66                 abort = true; calledHelp = true; 
67                 setParameters();
68                 vector<string> tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["rabund"] = tempOutNames;
71                 outputTypes["sabund"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 MGClusterCommand::MGClusterCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 
86                 else {
87                         vector<string> myArray = setParameters();
88                         
89                         OptionParser parser(option);
90                         map<string, string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string,string>::iterator it;
94                 
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["list"] = tempOutNames;
103                         outputTypes["rabund"] = tempOutNames;
104                         outputTypes["sabund"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("blast");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
117                                 }
118                                 
119                                 it = parameters.find("name");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
125                                 }
126                         }
127
128                         
129                         //check for required parameters
130                         blastfile = validParameter.validFile(parameters, "blast", true);
131                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
132                         else if (blastfile == "not found") { blastfile = ""; }
133                         
134                         //if the user changes the output directory command factory will send this info to us in the output parameter 
135                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
136                                 outputDir = ""; 
137                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
138                         }
139                         
140                         namefile = validParameter.validFile(parameters, "name", true);
141                         if (namefile == "not open") { abort = true; }   
142                         else if (namefile == "not found") { namefile = ""; }
143                         
144                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
145                         
146                         //check for optional parameter and set defaults
147                         string temp;
148                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
149                         precisionLength = temp.length();
150                         convert(temp, precision); 
151                         
152                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
153                         convert(temp, cutoff); 
154                         cutoff += (5 / (precision * 10.0));
155                         
156                         method = validParameter.validFile(parameters, "method", false);
157                         if (method == "not found") { method = "average"; }
158                         
159                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
160                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
161
162                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
163                         convert(temp, length); 
164                         
165                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
166                         convert(temp, penalty); 
167                         
168                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
169                         minWanted = m->isTrue(temp); 
170                         
171                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
172                         merge = m->isTrue(temp); 
173                         
174                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
175                         hclusterWanted = m->isTrue(temp); 
176                         
177                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
178                         hard = m->isTrue(temp);
179                 }
180
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
184                 exit(1);
185         }
186 }
187 //**********************************************************************************************************************
188 int MGClusterCommand::execute(){
189         try {
190                 
191                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
192                 
193                 //read names file
194                 if (namefile != "") {
195                         nameMap = new NameAssignment(namefile);
196                         nameMap->readMap();
197                 }else{ nameMap= new NameAssignment(); }
198                 
199                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
200                 string tag = "";
201                 time_t start;
202                 float previousDist = 0.00000;
203                 float rndPreviousDist = 0.00000;
204                 
205                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
206                 //must remember to delete those objects here since readBlast does not
207                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
208                 read->read(nameMap);
209                 
210                 list = new ListVector(nameMap->getListVector());
211                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
212                 
213                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
214                 
215                 start = time(NULL);
216                 oldList = *list;
217                 map<string, int> Seq2Bin;
218                 map<string, int> oldSeq2Bin;
219                 
220                 if (method == "furthest")               { tag = "fn";  }
221                 else if (method == "nearest")   { tag = "nn";  }
222                 else                                                    { tag = "an";  }
223                 
224                 //open output files
225                 m->openOutputFile(fileroot+ tag + ".list",  listFile);
226                 m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
227                 m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
228                 
229                 if (m->control_pressed) { 
230                         delete nameMap; delete read; delete list; delete rabund; 
231                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
232                         outputTypes.clear();
233                         return 0; 
234                 }
235                 
236                 if (!hclusterWanted) {
237                         //get distmatrix and overlap
238                         SparseMatrix* distMatrix = read->getDistMatrix();
239                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
240                         delete read;
241                 
242                         //create cluster
243                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
244                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
245                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
246                         cluster->setMapWanted(true);
247                         Seq2Bin = cluster->getSeqtoBin();
248                         oldSeq2Bin = Seq2Bin;
249                         
250                         if (m->control_pressed) { 
251                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
252                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
253                                 outputTypes.clear();
254                                 return 0; 
255                         }
256                 
257                         //cluster using cluster classes
258                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
259                                 
260                                 cluster->update(cutoff);
261                                 
262                                 if (m->control_pressed) { 
263                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
264                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
265                                         outputTypes.clear();
266                                         return 0; 
267                                 }
268                                 
269                                 float dist = distMatrix->getSmallDist();
270                                 float rndDist;
271                                 if (hard) {
272                                         rndDist = m->ceilDist(dist, precision); 
273                                 }else{
274                                         rndDist = m->roundDist(dist, precision); 
275                                 }
276                                 
277                                 if(previousDist <= 0.0000 && dist != previousDist){
278                                         oldList.setLabel("unique");
279                                         printData(&oldList);
280                                 }
281                                 else if(rndDist != rndPreviousDist){
282                                         if (merge) {
283                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
284                                                 
285                                                 if (m->control_pressed) { 
286                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
287                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
288                                                         outputTypes.clear();
289                                                         return 0; 
290                                                 }
291                                                 
292                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
293                                                 printData(temp);
294                                                 delete temp;
295                                         }else{
296                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
297                                                 printData(&oldList);
298                                         }
299                                 }
300         
301                                 previousDist = dist;
302                                 rndPreviousDist = rndDist;
303                                 oldList = *list;
304                                 Seq2Bin = cluster->getSeqtoBin();
305                                 oldSeq2Bin = Seq2Bin;
306                         }
307                         
308                         if(previousDist <= 0.0000){
309                                 oldList.setLabel("unique");
310                                 printData(&oldList);
311                         }
312                         else if(rndPreviousDist<cutoff){
313                                 if (merge) {
314                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
315                                         
316                                         if (m->control_pressed) { 
317                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
318                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
319                                                         outputTypes.clear();
320                                                         return 0; 
321                                         }
322                                         
323                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
324                                         printData(temp);
325                                         delete temp;
326                                 }else{
327                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
328                                         printData(&oldList);
329                                 }
330                         }
331                         
332                         //free memory
333                         overlapMatrix.clear();
334                         delete distMatrix;
335                         delete cluster;
336                         
337                 }else { //use hcluster to cluster
338                         //get distmatrix and overlap
339                         overlapFile = read->getOverlapFile();
340                         distFile = read->getDistFile(); 
341                         delete read;
342                 
343                         //sort the distance and overlap files
344                         sortHclusterFiles(distFile, overlapFile);
345                         
346                         if (m->control_pressed) { 
347                                 delete nameMap;  delete list; delete rabund; 
348                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
349                                 outputTypes.clear();
350                                 return 0; 
351                         }
352                 
353                         //create cluster
354                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
355                         hcluster->setMapWanted(true);
356                         Seq2Bin = cluster->getSeqtoBin();
357                         oldSeq2Bin = Seq2Bin;
358                         
359                         vector<seqDist> seqs; seqs.resize(1); // to start loop
360                         //ifstream inHcluster;
361                         //m->openInputFile(distFile, inHcluster);
362                         
363                         if (m->control_pressed) { 
364                                 delete nameMap;  delete list; delete rabund; delete hcluster;
365                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
366                                 outputTypes.clear();
367                                 return 0; 
368                         }
369
370                         while (seqs.size() != 0){
371                 
372                                 seqs = hcluster->getSeqs();
373                                 
374                                 if (m->control_pressed) { 
375                                         delete nameMap;  delete list; delete rabund; delete hcluster;
376                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
377                                         remove(distFile.c_str());
378                                         remove(overlapFile.c_str());
379                                         outputTypes.clear();
380                                         return 0; 
381                                 }
382                                 
383                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
384                                         
385                                         if (seqs[i].seq1 != seqs[i].seq2) {
386                 
387                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
388                                                 
389                                                 if (m->control_pressed) { 
390                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
391                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
392                                                         remove(distFile.c_str());
393                                                         remove(overlapFile.c_str());
394                                                         outputTypes.clear();
395                                                         return 0; 
396                                                 }
397         
398                                                 float rndDist;
399                                                 if (hard) {
400                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
401                                                 }else{
402                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
403                                                 }
404                                                                                                 
405                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
406                                                         oldList.setLabel("unique");
407                                                         printData(&oldList);
408                                                 }
409                                                 else if((rndDist != rndPreviousDist)){
410                                                         if (merge) {
411                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
412                                                                 
413                                                                 if (m->control_pressed) { 
414                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
415                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
416                                                                         remove(distFile.c_str());
417                                                                         remove(overlapFile.c_str());
418                                                                         outputTypes.clear();
419                                                                         return 0; 
420                                                                 }
421
422                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
423                                                                 printData(temp);
424                                                                 delete temp;
425                                                         }else{
426                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
427                                                                 printData(&oldList);
428                                                         }
429                                                 }
430                                                 
431                                                 previousDist = seqs[i].dist;
432                                                 rndPreviousDist = rndDist;
433                                                 oldList = *list;
434                                                 Seq2Bin = cluster->getSeqtoBin();
435                                                 oldSeq2Bin = Seq2Bin;
436                                         }
437                                 }
438                         }
439                         //inHcluster.close();
440                         
441                         if(previousDist <= 0.0000){
442                                 oldList.setLabel("unique");
443                                 printData(&oldList);
444                         }
445                         else if(rndPreviousDist<cutoff){
446                                 if (merge) {
447                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
448                                         
449                                         if (m->control_pressed) { 
450                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
451                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
452                                                         remove(distFile.c_str());
453                                                         remove(overlapFile.c_str());
454                                                         outputTypes.clear();
455                                                         return 0; 
456                                         }
457                                         
458                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
459                                         printData(temp);
460                                         delete temp;
461                                 }else{
462                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
463                                         printData(&oldList);
464                                 }
465                         }
466                         
467                         delete hcluster;
468                         remove(distFile.c_str());
469                         remove(overlapFile.c_str());
470                 }
471                 
472                 delete list; 
473                 delete rabund;
474                 listFile.close();
475                 sabundFile.close();
476                 rabundFile.close();
477         
478                 if (m->control_pressed) { 
479                         delete nameMap; 
480                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
481                         outputTypes.clear();
482                         return 0; 
483                 }
484                 
485                 m->mothurOutEndLine();
486                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
487                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
488                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
489                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
490                 m->mothurOutEndLine();
491                 
492                 //set list file as new current listfile
493                 string current = "";
494                 itTypes = outputTypes.find("list");
495                 if (itTypes != outputTypes.end()) {
496                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
497                 }
498                 
499                 //set rabund file as new current rabundfile
500                 itTypes = outputTypes.find("rabund");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
503                 }
504                 
505                 //set sabund file as new current sabundfile
506                 itTypes = outputTypes.find("sabund");
507                 if (itTypes != outputTypes.end()) {
508                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
509                 }
510                 
511                 
512                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
513                         
514                 return 0;
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "MGClusterCommand", "execute");
518                 exit(1);
519         }
520 }
521 //**********************************************************************************************************************
522 void MGClusterCommand::printData(ListVector* mergedList){
523         try {
524                 mergedList->print(listFile);
525                 mergedList->getRAbundVector().print(rabundFile);
526                 
527                 SAbundVector sabund = mergedList->getSAbundVector();
528
529                 sabund.print(cout);
530                 sabund.print(sabundFile);
531         }
532         catch(exception& e) {
533                 m->errorOut(e, "MGClusterCommand", "printData");
534                 exit(1);
535         }
536 }
537 //**********************************************************************************************************************
538 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
539 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
540 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
541         try {
542                 //create new listvector so you don't overwrite the clustering
543                 ListVector* newList = new ListVector(oldList);
544
545                 bool done = false;
546                 ifstream inOverlap;
547                 int count = 0;
548                 
549                 if (hclusterWanted) {  
550                         m->openInputFile(overlapFile, inOverlap);  
551                         if (inOverlap.eof()) {  done = true;  }
552                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
553                 
554                 while (!done) {
555                         if (m->control_pressed) { 
556                                 if (hclusterWanted) {   inOverlap.close();  }           
557                                 return newList;
558                         }
559                         
560                         //get next overlap
561                         seqDist overlapNode;
562                         if (!hclusterWanted) {  
563                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
564                                         overlapNode = overlapMatrix[count];
565                                         count++;
566                                 }else { break; }
567                         }else { 
568                                 if (!inOverlap.eof()) {
569                                         string firstName, secondName;
570                                         float overlapDistance;
571                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
572                                         
573                                         //commented out because we check this in readblast already
574                                         //map<string,int>::iterator itA = nameMap->find(firstName);
575                                         //map<string,int>::iterator itB = nameMap->find(secondName);
576                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
577                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
578                                         
579                                         //overlapNode.seq1 = itA->second;
580                                         //overlapNode.seq2 = itB->second;
581                                         overlapNode.seq1 = nameMap->get(firstName);
582                                         overlapNode.seq2 = nameMap->get(secondName);
583                                         overlapNode.dist = overlapDistance;
584                                 }else { inOverlap.close(); break; }
585                         } 
586                 
587                         if (overlapNode.dist < dist) {
588                                 //get names of seqs that overlap
589                                 string name1 = nameMap->get(overlapNode.seq1);
590                                 string name2 = nameMap->get(overlapNode.seq2);
591                         
592                                 //use binInfo to find out if they are already in the same bin
593                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
594                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
595                                 
596                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
597                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
598
599                                 //int binKeep = itBin1->second;
600                                 //int binRemove = itBin2->second;
601                                 
602                                 int binKeep = binInfo[name1];
603                                 int binRemove = binInfo[name2];
604                         
605                                 //if not merge bins and update binInfo
606                                 if(binKeep != binRemove) {
607                 
608                                         //save names in old bin
609                                         string names = newList->get(binRemove);
610                 
611                                         //merge bins into name1s bin
612                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
613                                         newList->set(binRemove, "");    
614                                         
615                                         //update binInfo
616                                         while (names.find_first_of(',') != -1) { 
617                                                 //get name from bin
618                                                 string name = names.substr(0,names.find_first_of(','));
619                                                 //save name and bin number
620                                                 binInfo[name] = binKeep;
621                                                 names = names.substr(names.find_first_of(',')+1, names.length());
622                                         }
623                                         
624                                         //get last name
625                                         binInfo[names] = binKeep;
626                                 }
627                                 
628                         }else { done = true; }
629                 }
630                 
631                 //return listvector
632                 return newList;
633                                 
634         }
635         catch(exception& e) {
636                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
637                 exit(1);
638         }
639 }
640 //**********************************************************************************************************************
641 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
642         try {
643                 //sort distFile
644                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
645                 remove(unsortedDist.c_str());  //delete unsorted file
646                 distFile = sortedDistFile;
647                 
648                 //sort overlap file
649                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
650                 remove(unsortedOverlap.c_str());  //delete unsorted file
651                 overlapFile = sortedOverlapFile;
652         }
653         catch(exception& e) {
654                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
655                 exit(1);
656         }
657 }
658
659 //**********************************************************************************************************************
660
661
662
663
664