]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added citation function to commands
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MGClusterCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46                 helpString += "The precision parameter's default value is 100. \n";
47                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
48                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
50                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
51                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
52                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53                 helpString += "The mgcluster command should be in the following format: \n";
54                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MGClusterCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 MGClusterCommand::MGClusterCommand(){   
65         try {
66                 abort = true; calledHelp = true; 
67                 setParameters();
68                 vector<string> tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["rabund"] = tempOutNames;
71                 outputTypes["sabund"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 MGClusterCommand::MGClusterCommand(string option) {
80         try {
81                 abort = false; calledHelp = false;   
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string, string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                 
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["list"] = tempOutNames;
104                         outputTypes["rabund"] = tempOutNames;
105                         outputTypes["sabund"] = tempOutNames;
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("blast");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
118                                 }
119                                 
120                                 it = parameters.find("name");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
126                                 }
127                         }
128
129                         
130                         //check for required parameters
131                         blastfile = validParameter.validFile(parameters, "blast", true);
132                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
133                         else if (blastfile == "not found") { blastfile = ""; }
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
137                                 outputDir = ""; 
138                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
139                         }
140                         
141                         namefile = validParameter.validFile(parameters, "name", true);
142                         if (namefile == "not open") { abort = true; }   
143                         else if (namefile == "not found") { namefile = ""; }
144                         
145                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
146                         
147                         //check for optional parameter and set defaults
148                         string temp;
149                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
150                         precisionLength = temp.length();
151                         convert(temp, precision); 
152                         
153                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
154                         convert(temp, cutoff); 
155                         cutoff += (5 / (precision * 10.0));
156                         
157                         method = validParameter.validFile(parameters, "method", false);
158                         if (method == "not found") { method = "average"; }
159                         
160                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
161                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
162
163                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
164                         convert(temp, length); 
165                         
166                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
167                         convert(temp, penalty); 
168                         
169                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
170                         minWanted = m->isTrue(temp); 
171                         
172                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
173                         merge = m->isTrue(temp); 
174                         
175                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
176                         hclusterWanted = m->isTrue(temp); 
177                         
178                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
179                         hard = m->isTrue(temp);
180                 }
181
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
185                 exit(1);
186         }
187 }
188 //**********************************************************************************************************************
189 int MGClusterCommand::execute(){
190         try {
191                 
192                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
193                 
194                 //read names file
195                 if (namefile != "") {
196                         nameMap = new NameAssignment(namefile);
197                         nameMap->readMap();
198                 }else{ nameMap= new NameAssignment(); }
199                 
200                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
201                 string tag = "";
202                 time_t start;
203                 float previousDist = 0.00000;
204                 float rndPreviousDist = 0.00000;
205                 
206                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
207                 //must remember to delete those objects here since readBlast does not
208                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
209                 read->read(nameMap);
210                 
211                 list = new ListVector(nameMap->getListVector());
212                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
213                 
214                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
215                 
216                 start = time(NULL);
217                 oldList = *list;
218                 map<string, int> Seq2Bin;
219                 map<string, int> oldSeq2Bin;
220                 
221                 if (method == "furthest")               { tag = "fn";  }
222                 else if (method == "nearest")   { tag = "nn";  }
223                 else                                                    { tag = "an";  }
224                 
225                 //open output files
226                 m->openOutputFile(fileroot+ tag + ".list",  listFile);
227                 m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
228                 m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
229                 
230                 if (m->control_pressed) { 
231                         delete nameMap; delete read; delete list; delete rabund; 
232                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
233                         outputTypes.clear();
234                         return 0; 
235                 }
236                 
237                 if (!hclusterWanted) {
238                         //get distmatrix and overlap
239                         SparseMatrix* distMatrix = read->getDistMatrix();
240                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
241                         delete read;
242                 
243                         //create cluster
244                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
245                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
246                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
247                         cluster->setMapWanted(true);
248                         Seq2Bin = cluster->getSeqtoBin();
249                         oldSeq2Bin = Seq2Bin;
250                         
251                         if (m->control_pressed) { 
252                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
253                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
254                                 outputTypes.clear();
255                                 return 0; 
256                         }
257                 
258                         //cluster using cluster classes
259                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
260                                 
261                                 cluster->update(cutoff);
262                                 
263                                 if (m->control_pressed) { 
264                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
265                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
266                                         outputTypes.clear();
267                                         return 0; 
268                                 }
269                                 
270                                 float dist = distMatrix->getSmallDist();
271                                 float rndDist;
272                                 if (hard) {
273                                         rndDist = m->ceilDist(dist, precision); 
274                                 }else{
275                                         rndDist = m->roundDist(dist, precision); 
276                                 }
277                                 
278                                 if(previousDist <= 0.0000 && dist != previousDist){
279                                         oldList.setLabel("unique");
280                                         printData(&oldList);
281                                 }
282                                 else if(rndDist != rndPreviousDist){
283                                         if (merge) {
284                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
285                                                 
286                                                 if (m->control_pressed) { 
287                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
288                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
289                                                         outputTypes.clear();
290                                                         return 0; 
291                                                 }
292                                                 
293                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
294                                                 printData(temp);
295                                                 delete temp;
296                                         }else{
297                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
298                                                 printData(&oldList);
299                                         }
300                                 }
301         
302                                 previousDist = dist;
303                                 rndPreviousDist = rndDist;
304                                 oldList = *list;
305                                 Seq2Bin = cluster->getSeqtoBin();
306                                 oldSeq2Bin = Seq2Bin;
307                         }
308                         
309                         if(previousDist <= 0.0000){
310                                 oldList.setLabel("unique");
311                                 printData(&oldList);
312                         }
313                         else if(rndPreviousDist<cutoff){
314                                 if (merge) {
315                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
316                                         
317                                         if (m->control_pressed) { 
318                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
319                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
320                                                         outputTypes.clear();
321                                                         return 0; 
322                                         }
323                                         
324                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
325                                         printData(temp);
326                                         delete temp;
327                                 }else{
328                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
329                                         printData(&oldList);
330                                 }
331                         }
332                         
333                         //free memory
334                         overlapMatrix.clear();
335                         delete distMatrix;
336                         delete cluster;
337                         
338                 }else { //use hcluster to cluster
339                         //get distmatrix and overlap
340                         overlapFile = read->getOverlapFile();
341                         distFile = read->getDistFile(); 
342                         delete read;
343                 
344                         //sort the distance and overlap files
345                         sortHclusterFiles(distFile, overlapFile);
346                         
347                         if (m->control_pressed) { 
348                                 delete nameMap;  delete list; delete rabund; 
349                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
350                                 outputTypes.clear();
351                                 return 0; 
352                         }
353                 
354                         //create cluster
355                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
356                         hcluster->setMapWanted(true);
357                         Seq2Bin = cluster->getSeqtoBin();
358                         oldSeq2Bin = Seq2Bin;
359                         
360                         vector<seqDist> seqs; seqs.resize(1); // to start loop
361                         //ifstream inHcluster;
362                         //m->openInputFile(distFile, inHcluster);
363                         
364                         if (m->control_pressed) { 
365                                 delete nameMap;  delete list; delete rabund; delete hcluster;
366                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
367                                 outputTypes.clear();
368                                 return 0; 
369                         }
370
371                         while (seqs.size() != 0){
372                 
373                                 seqs = hcluster->getSeqs();
374                                 
375                                 if (m->control_pressed) { 
376                                         delete nameMap;  delete list; delete rabund; delete hcluster;
377                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
378                                         remove(distFile.c_str());
379                                         remove(overlapFile.c_str());
380                                         outputTypes.clear();
381                                         return 0; 
382                                 }
383                                 
384                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
385                                         
386                                         if (seqs[i].seq1 != seqs[i].seq2) {
387                 
388                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
389                                                 
390                                                 if (m->control_pressed) { 
391                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
392                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
393                                                         remove(distFile.c_str());
394                                                         remove(overlapFile.c_str());
395                                                         outputTypes.clear();
396                                                         return 0; 
397                                                 }
398         
399                                                 float rndDist;
400                                                 if (hard) {
401                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
402                                                 }else{
403                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
404                                                 }
405                                                                                                 
406                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
407                                                         oldList.setLabel("unique");
408                                                         printData(&oldList);
409                                                 }
410                                                 else if((rndDist != rndPreviousDist)){
411                                                         if (merge) {
412                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
413                                                                 
414                                                                 if (m->control_pressed) { 
415                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
416                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
417                                                                         remove(distFile.c_str());
418                                                                         remove(overlapFile.c_str());
419                                                                         outputTypes.clear();
420                                                                         return 0; 
421                                                                 }
422
423                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
424                                                                 printData(temp);
425                                                                 delete temp;
426                                                         }else{
427                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
428                                                                 printData(&oldList);
429                                                         }
430                                                 }
431                                                 
432                                                 previousDist = seqs[i].dist;
433                                                 rndPreviousDist = rndDist;
434                                                 oldList = *list;
435                                                 Seq2Bin = cluster->getSeqtoBin();
436                                                 oldSeq2Bin = Seq2Bin;
437                                         }
438                                 }
439                         }
440                         //inHcluster.close();
441                         
442                         if(previousDist <= 0.0000){
443                                 oldList.setLabel("unique");
444                                 printData(&oldList);
445                         }
446                         else if(rndPreviousDist<cutoff){
447                                 if (merge) {
448                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
449                                         
450                                         if (m->control_pressed) { 
451                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
452                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
453                                                         remove(distFile.c_str());
454                                                         remove(overlapFile.c_str());
455                                                         outputTypes.clear();
456                                                         return 0; 
457                                         }
458                                         
459                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
460                                         printData(temp);
461                                         delete temp;
462                                 }else{
463                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
464                                         printData(&oldList);
465                                 }
466                         }
467                         
468                         delete hcluster;
469                         remove(distFile.c_str());
470                         remove(overlapFile.c_str());
471                 }
472                 
473                 delete list; 
474                 delete rabund;
475                 listFile.close();
476                 sabundFile.close();
477                 rabundFile.close();
478         
479                 if (m->control_pressed) { 
480                         delete nameMap; 
481                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
482                         outputTypes.clear();
483                         return 0; 
484                 }
485                 
486                 m->mothurOutEndLine();
487                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
488                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
489                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
490                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
491                 m->mothurOutEndLine();
492                 
493                 //set list file as new current listfile
494                 string current = "";
495                 itTypes = outputTypes.find("list");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
498                 }
499                 
500                 //set rabund file as new current rabundfile
501                 itTypes = outputTypes.find("rabund");
502                 if (itTypes != outputTypes.end()) {
503                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
504                 }
505                 
506                 //set sabund file as new current sabundfile
507                 itTypes = outputTypes.find("sabund");
508                 if (itTypes != outputTypes.end()) {
509                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
510                 }
511                 
512                 
513                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
514                         
515                 return 0;
516         }
517         catch(exception& e) {
518                 m->errorOut(e, "MGClusterCommand", "execute");
519                 exit(1);
520         }
521 }
522 //**********************************************************************************************************************
523 void MGClusterCommand::printData(ListVector* mergedList){
524         try {
525                 mergedList->print(listFile);
526                 mergedList->getRAbundVector().print(rabundFile);
527                 
528                 SAbundVector sabund = mergedList->getSAbundVector();
529
530                 sabund.print(cout);
531                 sabund.print(sabundFile);
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "MGClusterCommand", "printData");
535                 exit(1);
536         }
537 }
538 //**********************************************************************************************************************
539 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
540 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
541 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
542         try {
543                 //create new listvector so you don't overwrite the clustering
544                 ListVector* newList = new ListVector(oldList);
545
546                 bool done = false;
547                 ifstream inOverlap;
548                 int count = 0;
549                 
550                 if (hclusterWanted) {  
551                         m->openInputFile(overlapFile, inOverlap);  
552                         if (inOverlap.eof()) {  done = true;  }
553                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
554                 
555                 while (!done) {
556                         if (m->control_pressed) { 
557                                 if (hclusterWanted) {   inOverlap.close();  }           
558                                 return newList;
559                         }
560                         
561                         //get next overlap
562                         seqDist overlapNode;
563                         if (!hclusterWanted) {  
564                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
565                                         overlapNode = overlapMatrix[count];
566                                         count++;
567                                 }else { break; }
568                         }else { 
569                                 if (!inOverlap.eof()) {
570                                         string firstName, secondName;
571                                         float overlapDistance;
572                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
573                                         
574                                         //commented out because we check this in readblast already
575                                         //map<string,int>::iterator itA = nameMap->find(firstName);
576                                         //map<string,int>::iterator itB = nameMap->find(secondName);
577                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
578                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
579                                         
580                                         //overlapNode.seq1 = itA->second;
581                                         //overlapNode.seq2 = itB->second;
582                                         overlapNode.seq1 = nameMap->get(firstName);
583                                         overlapNode.seq2 = nameMap->get(secondName);
584                                         overlapNode.dist = overlapDistance;
585                                 }else { inOverlap.close(); break; }
586                         } 
587                 
588                         if (overlapNode.dist < dist) {
589                                 //get names of seqs that overlap
590                                 string name1 = nameMap->get(overlapNode.seq1);
591                                 string name2 = nameMap->get(overlapNode.seq2);
592                         
593                                 //use binInfo to find out if they are already in the same bin
594                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
595                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
596                                 
597                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
598                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
599
600                                 //int binKeep = itBin1->second;
601                                 //int binRemove = itBin2->second;
602                                 
603                                 int binKeep = binInfo[name1];
604                                 int binRemove = binInfo[name2];
605                         
606                                 //if not merge bins and update binInfo
607                                 if(binKeep != binRemove) {
608                 
609                                         //save names in old bin
610                                         string names = newList->get(binRemove);
611                 
612                                         //merge bins into name1s bin
613                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
614                                         newList->set(binRemove, "");    
615                                         
616                                         //update binInfo
617                                         while (names.find_first_of(',') != -1) { 
618                                                 //get name from bin
619                                                 string name = names.substr(0,names.find_first_of(','));
620                                                 //save name and bin number
621                                                 binInfo[name] = binKeep;
622                                                 names = names.substr(names.find_first_of(',')+1, names.length());
623                                         }
624                                         
625                                         //get last name
626                                         binInfo[names] = binKeep;
627                                 }
628                                 
629                         }else { done = true; }
630                 }
631                 
632                 //return listvector
633                 return newList;
634                                 
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
638                 exit(1);
639         }
640 }
641 //**********************************************************************************************************************
642 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
643         try {
644                 //sort distFile
645                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
646                 remove(unsortedDist.c_str());  //delete unsorted file
647                 distFile = sortedDistFile;
648                 
649                 //sort overlap file
650                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
651                 remove(unsortedOverlap.c_str());  //delete unsorted file
652                 overlapFile = sortedOverlapFile;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
656                 exit(1);
657         }
658 }
659
660 //**********************************************************************************************************************
661
662
663
664
665