]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12
13 //**********************************************************************************************************************
14 vector<string> MGClusterCommand::getValidParameters(){  
15         try {
16                 string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "MGClusterCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 MGClusterCommand::MGClusterCommand(){   
27         try {
28                 abort = true; calledHelp = true; 
29                 vector<string> tempOutNames;
30                 outputTypes["list"] = tempOutNames;
31                 outputTypes["rabund"] = tempOutNames;
32                 outputTypes["sabund"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> MGClusterCommand::getRequiredParameters(){       
41         try {
42                 string Array[] =  {"blast"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MGClusterCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> MGClusterCommand::getRequiredFiles(){    
53         try {
54                 vector<string> myArray;
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "MGClusterCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 MGClusterCommand::MGClusterCommand(string option) {
64         try {
65                 globaldata = GlobalData::getInstance();
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string, string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string,string>::iterator it;
81                 
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["list"] = tempOutNames;
90                         outputTypes["rabund"] = tempOutNames;
91                         outputTypes["sabund"] = tempOutNames;
92                         
93                         //if the user changes the input directory command factory will send this info to us in the output parameter 
94                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
95                         if (inputDir == "not found"){   inputDir = "";          }
96                         else {
97                                 string path;
98                                 it = parameters.find("blast");
99                                 //user has given a template file
100                                 if(it != parameters.end()){ 
101                                         path = m->hasPath(it->second);
102                                         //if the user has not given a path then, add inputdir. else leave path alone.
103                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
104                                 }
105                                 
106                                 it = parameters.find("name");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
112                                 }
113                         }
114
115                         
116                         //check for required parameters
117                         blastfile = validParameter.validFile(parameters, "blast", true);
118                         if (blastfile == "not open") { abort = true; }  
119                         else if (blastfile == "not found") { blastfile = ""; }
120                         
121                         //if the user changes the output directory command factory will send this info to us in the output parameter 
122                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
123                                 outputDir = ""; 
124                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
125                         }
126                         
127                         namefile = validParameter.validFile(parameters, "name", true);
128                         if (namefile == "not open") { abort = true; }   
129                         else if (namefile == "not found") { namefile = ""; }
130                         
131                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
132                         
133                         //check for optional parameter and set defaults
134                         string temp;
135                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
136                         precisionLength = temp.length();
137                         convert(temp, precision); 
138                         
139                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
140                         convert(temp, cutoff); 
141                         cutoff += (5 / (precision * 10.0));
142                         
143                         method = validParameter.validFile(parameters, "method", false);
144                         if (method == "not found") { method = "furthest"; }
145                         
146                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
147                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
148
149                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
150                         convert(temp, length); 
151                         
152                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
153                         convert(temp, penalty); 
154                         
155                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
156                         minWanted = m->isTrue(temp); 
157                         
158                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
159                         merge = m->isTrue(temp); 
160                         
161                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
162                         hclusterWanted = m->isTrue(temp); 
163                         
164                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
165                         hard = m->isTrue(temp);
166                 }
167
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
171                 exit(1);
172         }
173 }
174 //**********************************************************************************************************************
175
176 void MGClusterCommand::help(){
177         try {
178                 m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
179                 m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
180                 m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
181                 m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
182                 m->mothurOut("The precision parameter's default value is 100. \n");
183                 m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
184                 m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
185                 m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
186                 m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
187                 m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
188                 m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
189                 m->mothurOut("The mgcluster command should be in the following format: \n");
190                 m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
191                 m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "MGClusterCommand", "help");
195                 exit(1);
196         }
197 }
198 //**********************************************************************************************************************
199 MGClusterCommand::~MGClusterCommand(){}
200 //**********************************************************************************************************************
201 int MGClusterCommand::execute(){
202         try {
203                 
204                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
205                 
206                 //read names file
207                 if (namefile != "") {
208                         nameMap = new NameAssignment(namefile);
209                         nameMap->readMap();
210                 }else{ nameMap= new NameAssignment(); }
211                 
212                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
213                 string tag = "";
214                 time_t start;
215                 float previousDist = 0.00000;
216                 float rndPreviousDist = 0.00000;
217                 
218                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
219                 //must remember to delete those objects here since readBlast does not
220                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
221                 read->read(nameMap);
222                 
223                 list = new ListVector(nameMap->getListVector());
224                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
225                 
226                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
227                 
228                 start = time(NULL);
229                 oldList = *list;
230                 map<string, int> Seq2Bin;
231                 map<string, int> oldSeq2Bin;
232                 
233                 if (method == "furthest")               { tag = "fn";  }
234                 else if (method == "nearest")   { tag = "nn";  }
235                 else                                                    { tag = "an";  }
236                 
237                 //open output files
238                 m->openOutputFile(fileroot+ tag + ".list",  listFile);
239                 m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
240                 m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
241                 
242                 if (m->control_pressed) { 
243                         delete nameMap; delete read; delete list; delete rabund; 
244                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
245                         outputTypes.clear();
246                         return 0; 
247                 }
248                 
249                 if (!hclusterWanted) {
250                         //get distmatrix and overlap
251                         SparseMatrix* distMatrix = read->getDistMatrix();
252                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
253                         delete read;
254                 
255                         //create cluster
256                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
257                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
258                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
259                         cluster->setMapWanted(true);
260                         Seq2Bin = cluster->getSeqtoBin();
261                         oldSeq2Bin = Seq2Bin;
262                         
263                         if (m->control_pressed) { 
264                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
265                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
266                                 outputTypes.clear();
267                                 return 0; 
268                         }
269                 
270                         //cluster using cluster classes
271                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
272                                 
273                                 cluster->update(cutoff);
274                                 
275                                 if (m->control_pressed) { 
276                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
277                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
278                                         outputTypes.clear();
279                                         return 0; 
280                                 }
281                                 
282                                 float dist = distMatrix->getSmallDist();
283                                 float rndDist;
284                                 if (hard) {
285                                         rndDist = m->ceilDist(dist, precision); 
286                                 }else{
287                                         rndDist = m->roundDist(dist, precision); 
288                                 }
289                                 
290                                 if(previousDist <= 0.0000 && dist != previousDist){
291                                         oldList.setLabel("unique");
292                                         printData(&oldList);
293                                 }
294                                 else if(rndDist != rndPreviousDist){
295                                         if (merge) {
296                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
297                                                 
298                                                 if (m->control_pressed) { 
299                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
300                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
301                                                         outputTypes.clear();
302                                                         return 0; 
303                                                 }
304                                                 
305                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
306                                                 printData(temp);
307                                                 delete temp;
308                                         }else{
309                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
310                                                 printData(&oldList);
311                                         }
312                                 }
313         
314                                 previousDist = dist;
315                                 rndPreviousDist = rndDist;
316                                 oldList = *list;
317                                 Seq2Bin = cluster->getSeqtoBin();
318                                 oldSeq2Bin = Seq2Bin;
319                         }
320                         
321                         if(previousDist <= 0.0000){
322                                 oldList.setLabel("unique");
323                                 printData(&oldList);
324                         }
325                         else if(rndPreviousDist<cutoff){
326                                 if (merge) {
327                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
328                                         
329                                         if (m->control_pressed) { 
330                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
331                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
332                                                         outputTypes.clear();
333                                                         return 0; 
334                                         }
335                                         
336                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
337                                         printData(temp);
338                                         delete temp;
339                                 }else{
340                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
341                                         printData(&oldList);
342                                 }
343                         }
344                         
345                         //free memory
346                         overlapMatrix.clear();
347                         delete distMatrix;
348                         delete cluster;
349                         
350                 }else { //use hcluster to cluster
351                         //get distmatrix and overlap
352                         overlapFile = read->getOverlapFile();
353                         distFile = read->getDistFile(); 
354                         delete read;
355                 
356                         //sort the distance and overlap files
357                         sortHclusterFiles(distFile, overlapFile);
358                         
359                         if (m->control_pressed) { 
360                                 delete nameMap;  delete list; delete rabund; 
361                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
362                                 outputTypes.clear();
363                                 return 0; 
364                         }
365                 
366                         //create cluster
367                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
368                         hcluster->setMapWanted(true);
369                         Seq2Bin = cluster->getSeqtoBin();
370                         oldSeq2Bin = Seq2Bin;
371                         
372                         vector<seqDist> seqs; seqs.resize(1); // to start loop
373                         //ifstream inHcluster;
374                         //m->openInputFile(distFile, inHcluster);
375                         
376                         if (m->control_pressed) { 
377                                 delete nameMap;  delete list; delete rabund; delete hcluster;
378                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
379                                 outputTypes.clear();
380                                 return 0; 
381                         }
382
383                         while (seqs.size() != 0){
384                 
385                                 seqs = hcluster->getSeqs();
386                                 
387                                 if (m->control_pressed) { 
388                                         delete nameMap;  delete list; delete rabund; delete hcluster;
389                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
390                                         remove(distFile.c_str());
391                                         remove(overlapFile.c_str());
392                                         outputTypes.clear();
393                                         return 0; 
394                                 }
395                                 
396                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
397                                         
398                                         if (seqs[i].seq1 != seqs[i].seq2) {
399                 
400                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
401                                                 
402                                                 if (m->control_pressed) { 
403                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
404                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
405                                                         remove(distFile.c_str());
406                                                         remove(overlapFile.c_str());
407                                                         outputTypes.clear();
408                                                         return 0; 
409                                                 }
410         
411                                                 float rndDist;
412                                                 if (hard) {
413                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
414                                                 }else{
415                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
416                                                 }
417                                                                                                 
418                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
419                                                         oldList.setLabel("unique");
420                                                         printData(&oldList);
421                                                 }
422                                                 else if((rndDist != rndPreviousDist)){
423                                                         if (merge) {
424                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
425                                                                 
426                                                                 if (m->control_pressed) { 
427                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
428                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
429                                                                         remove(distFile.c_str());
430                                                                         remove(overlapFile.c_str());
431                                                                         outputTypes.clear();
432                                                                         return 0; 
433                                                                 }
434
435                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
436                                                                 printData(temp);
437                                                                 delete temp;
438                                                         }else{
439                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
440                                                                 printData(&oldList);
441                                                         }
442                                                 }
443                                                 
444                                                 previousDist = seqs[i].dist;
445                                                 rndPreviousDist = rndDist;
446                                                 oldList = *list;
447                                                 Seq2Bin = cluster->getSeqtoBin();
448                                                 oldSeq2Bin = Seq2Bin;
449                                         }
450                                 }
451                         }
452                         //inHcluster.close();
453                         
454                         if(previousDist <= 0.0000){
455                                 oldList.setLabel("unique");
456                                 printData(&oldList);
457                         }
458                         else if(rndPreviousDist<cutoff){
459                                 if (merge) {
460                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
461                                         
462                                         if (m->control_pressed) { 
463                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
464                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
465                                                         remove(distFile.c_str());
466                                                         remove(overlapFile.c_str());
467                                                         outputTypes.clear();
468                                                         return 0; 
469                                         }
470                                         
471                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
472                                         printData(temp);
473                                         delete temp;
474                                 }else{
475                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
476                                         printData(&oldList);
477                                 }
478                         }
479                         
480                         delete hcluster;
481                         remove(distFile.c_str());
482                         remove(overlapFile.c_str());
483                 }
484                 
485                 delete list; 
486                 delete rabund;
487                 listFile.close();
488                 sabundFile.close();
489                 rabundFile.close();
490         
491                 globaldata->setListFile(fileroot+ tag + ".list");
492                 globaldata->setFormat("list");
493                 
494                 if (m->control_pressed) { 
495                         delete nameMap; 
496                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
497                         globaldata->setListFile("");
498                         globaldata->setFormat("");
499                         outputTypes.clear();
500                         return 0; 
501                 }
502                 
503                 m->mothurOutEndLine();
504                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
505                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
506                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
507                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
508                 m->mothurOutEndLine();
509                 
510                 //set list file as new current listfile
511                 string current = "";
512                 itTypes = outputTypes.find("list");
513                 if (itTypes != outputTypes.end()) {
514                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
515                 }
516                 
517                 //set rabund file as new current rabundfile
518                 itTypes = outputTypes.find("rabund");
519                 if (itTypes != outputTypes.end()) {
520                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
521                 }
522                 
523                 //set sabund file as new current sabundfile
524                 itTypes = outputTypes.find("sabund");
525                 if (itTypes != outputTypes.end()) {
526                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
527                 }
528                 
529                 
530                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
531                         
532                 return 0;
533         }
534         catch(exception& e) {
535                 m->errorOut(e, "MGClusterCommand", "execute");
536                 exit(1);
537         }
538 }
539 //**********************************************************************************************************************
540 void MGClusterCommand::printData(ListVector* mergedList){
541         try {
542                 mergedList->print(listFile);
543                 mergedList->getRAbundVector().print(rabundFile);
544                 
545                 SAbundVector sabund = mergedList->getSAbundVector();
546
547                 sabund.print(cout);
548                 sabund.print(sabundFile);
549         }
550         catch(exception& e) {
551                 m->errorOut(e, "MGClusterCommand", "printData");
552                 exit(1);
553         }
554 }
555 //**********************************************************************************************************************
556 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
557 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
558 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
559         try {
560                 //create new listvector so you don't overwrite the clustering
561                 ListVector* newList = new ListVector(oldList);
562
563                 bool done = false;
564                 ifstream inOverlap;
565                 int count = 0;
566                 
567                 if (hclusterWanted) {  
568                         m->openInputFile(overlapFile, inOverlap);  
569                         if (inOverlap.eof()) {  done = true;  }
570                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
571                 
572                 while (!done) {
573                         if (m->control_pressed) { 
574                                 if (hclusterWanted) {   inOverlap.close();  }           
575                                 return newList;
576                         }
577                         
578                         //get next overlap
579                         seqDist overlapNode;
580                         if (!hclusterWanted) {  
581                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
582                                         overlapNode = overlapMatrix[count];
583                                         count++;
584                                 }else { break; }
585                         }else { 
586                                 if (!inOverlap.eof()) {
587                                         string firstName, secondName;
588                                         float overlapDistance;
589                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
590                                         
591                                         //commented out because we check this in readblast already
592                                         //map<string,int>::iterator itA = nameMap->find(firstName);
593                                         //map<string,int>::iterator itB = nameMap->find(secondName);
594                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
595                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
596                                         
597                                         //overlapNode.seq1 = itA->second;
598                                         //overlapNode.seq2 = itB->second;
599                                         overlapNode.seq1 = nameMap->get(firstName);
600                                         overlapNode.seq2 = nameMap->get(secondName);
601                                         overlapNode.dist = overlapDistance;
602                                 }else { inOverlap.close(); break; }
603                         } 
604                 
605                         if (overlapNode.dist < dist) {
606                                 //get names of seqs that overlap
607                                 string name1 = nameMap->get(overlapNode.seq1);
608                                 string name2 = nameMap->get(overlapNode.seq2);
609                         
610                                 //use binInfo to find out if they are already in the same bin
611                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
612                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
613                                 
614                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
615                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
616
617                                 //int binKeep = itBin1->second;
618                                 //int binRemove = itBin2->second;
619                                 
620                                 int binKeep = binInfo[name1];
621                                 int binRemove = binInfo[name2];
622                         
623                                 //if not merge bins and update binInfo
624                                 if(binKeep != binRemove) {
625                 
626                                         //save names in old bin
627                                         string names = newList->get(binRemove);
628                 
629                                         //merge bins into name1s bin
630                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
631                                         newList->set(binRemove, "");    
632                                         
633                                         //update binInfo
634                                         while (names.find_first_of(',') != -1) { 
635                                                 //get name from bin
636                                                 string name = names.substr(0,names.find_first_of(','));
637                                                 //save name and bin number
638                                                 binInfo[name] = binKeep;
639                                                 names = names.substr(names.find_first_of(',')+1, names.length());
640                                         }
641                                         
642                                         //get last name
643                                         binInfo[names] = binKeep;
644                                 }
645                                 
646                         }else { done = true; }
647                 }
648                 
649                 //return listvector
650                 return newList;
651                                 
652         }
653         catch(exception& e) {
654                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
655                 exit(1);
656         }
657 }
658 //**********************************************************************************************************************
659 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
660         try {
661                 //sort distFile
662                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
663                 remove(unsortedDist.c_str());  //delete unsorted file
664                 distFile = sortedDistFile;
665                 
666                 //sort overlap file
667                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
668                 remove(unsortedOverlap.c_str());  //delete unsorted file
669                 overlapFile = sortedOverlapFile;
670         }
671         catch(exception& e) {
672                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
673                 exit(1);
674         }
675 }
676
677 //**********************************************************************************************************************
678
679
680
681
682