]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> MGClusterCommand::setParameters(){       
14         try {
15                 CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17                 CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
18                 CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
19                 CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
22                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
24                 CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
25                 CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MGClusterCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string MGClusterCommand::getHelpString(){       
40         try {
41                 string helpString = "";
42                 helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
43                 helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
44                 helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
45                 helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
46                 helpString += "The precision parameter's default value is 100. \n";
47                 helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
48                 helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
49                 helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
50                 helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
51                 helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
52                 helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
53                 helpString += "The mgcluster command should be in the following format: \n";
54                 helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "MGClusterCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){        
65         try {
66         string outputFileName = "";
67                 map<string, vector<string> >::iterator it;
68         
69         //is this a type this command creates
70         it = outputTypes.find(type);
71         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
72         else {
73             if (type == "list") {  outputFileName =  "list"; }
74             else if (type == "rabund") {  outputFileName =  "rabund"; }
75             else if (type == "sabund") {  outputFileName =  "sabund"; }
76             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
77         }
78         return outputFileName;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 MGClusterCommand::MGClusterCommand(){   
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["list"] = tempOutNames;
92                 outputTypes["rabund"] = tempOutNames;
93                 outputTypes["sabund"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 MGClusterCommand::MGClusterCommand(string option) {
102         try {
103                 abort = false; calledHelp = false;   
104                 
105                 //allow user to run help
106                 if(option == "help") { help(); abort = true; calledHelp = true; }
107                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
108                 
109                 else {
110                         vector<string> myArray = setParameters();
111                         
112                         OptionParser parser(option);
113                         map<string, string> parameters = parser.getParameters();
114                         
115                         ValidParameters validParameter;
116                         map<string,string>::iterator it;
117                 
118                         //check to make sure all parameters are valid for command
119                         for (it = parameters.begin(); it != parameters.end(); it++) { 
120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
121                         }
122                         
123                         //initialize outputTypes
124                         vector<string> tempOutNames;
125                         outputTypes["list"] = tempOutNames;
126                         outputTypes["rabund"] = tempOutNames;
127                         outputTypes["sabund"] = tempOutNames;
128                         
129                         //if the user changes the input directory command factory will send this info to us in the output parameter 
130                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
131                         if (inputDir == "not found"){   inputDir = "";          }
132                         else {
133                                 string path;
134                                 it = parameters.find("blast");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
140                                 }
141                                 
142                                 it = parameters.find("name");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
148                                 }
149                         }
150
151                         
152                         //check for required parameters
153                         blastfile = validParameter.validFile(parameters, "blast", true);
154                         if (blastfile == "not open") { blastfile = ""; abort = true; }  
155                         else if (blastfile == "not found") { blastfile = ""; }
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
159                                 outputDir = ""; 
160                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
161                         }
162                         
163                         namefile = validParameter.validFile(parameters, "name", true);
164                         if (namefile == "not open") { abort = true; }   
165                         else if (namefile == "not found") { namefile = ""; }
166                         else { m->setNameFile(namefile); }
167                         
168                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
169                         
170                         //check for optional parameter and set defaults
171                         string temp;
172                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
173                         precisionLength = temp.length();
174                         m->mothurConvert(temp, precision); 
175                         
176                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
177                         m->mothurConvert(temp, cutoff); 
178                         cutoff += (5 / (precision * 10.0));
179                         
180                         method = validParameter.validFile(parameters, "method", false);
181                         if (method == "not found") { method = "average"; }
182                         
183                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
184                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
185
186                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
187                         m->mothurConvert(temp, length); 
188                         
189                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
190                         m->mothurConvert(temp, penalty); 
191                         
192                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
193                         minWanted = m->isTrue(temp); 
194                         
195                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
196                         merge = m->isTrue(temp); 
197                         
198                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
199                         hclusterWanted = m->isTrue(temp); 
200                         
201                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
202                         hard = m->isTrue(temp);
203                 }
204
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
208                 exit(1);
209         }
210 }
211 //**********************************************************************************************************************
212 int MGClusterCommand::execute(){
213         try {
214                 
215                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
216                 
217                 //read names file
218                 if (namefile != "") {
219                         nameMap = new NameAssignment(namefile);
220                         nameMap->readMap();
221                 }else{ nameMap= new NameAssignment(); }
222                 
223                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
224                 string tag = "";
225                 time_t start;
226                 float previousDist = 0.00000;
227                 float rndPreviousDist = 0.00000;
228                 
229                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
230                 //must remember to delete those objects here since readBlast does not
231                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
232                 read->read(nameMap);
233                 
234                 list = new ListVector(nameMap->getListVector());
235                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
236                 
237                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
238                 
239                 start = time(NULL);
240                 oldList = *list;
241                 map<string, int> Seq2Bin;
242                 map<string, int> oldSeq2Bin;
243                 
244                 if (method == "furthest")               { tag = "fn";  }
245                 else if (method == "nearest")   { tag = "nn";  }
246                 else                                                    { tag = "an";  }
247                 
248         string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
249         string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
250         string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
251         
252                 m->openOutputFile(sabundFileName,       sabundFile);
253                 m->openOutputFile(rabundFileName,       rabundFile);
254                 m->openOutputFile(listFileName, listFile);
255                 
256                 if (m->control_pressed) { 
257                         delete nameMap; delete read; delete list; delete rabund; 
258                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
259                         outputTypes.clear();
260                         return 0; 
261                 }
262                 
263                 double saveCutoff = cutoff;
264                 
265                 if (!hclusterWanted) {
266                         //get distmatrix and overlap
267                         SparseMatrix* distMatrix = read->getDistMatrix();
268                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
269                         delete read;
270                 
271                         //create cluster
272                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
273                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
274                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
275                         cluster->setMapWanted(true);
276                         Seq2Bin = cluster->getSeqtoBin();
277                         oldSeq2Bin = Seq2Bin;
278                         
279                         if (m->control_pressed) { 
280                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
281                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
282                                 outputTypes.clear();
283                                 return 0; 
284                         }
285                 
286                         //cluster using cluster classes
287                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
288                                 
289                                 cluster->update(cutoff);
290                                 
291                                 if (m->control_pressed) { 
292                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
293                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
294                                         outputTypes.clear();
295                                         return 0; 
296                                 }
297                                 
298                                 float dist = distMatrix->getSmallDist();
299                                 float rndDist;
300                                 if (hard) {
301                                         rndDist = m->ceilDist(dist, precision); 
302                                 }else{
303                                         rndDist = m->roundDist(dist, precision); 
304                                 }
305                                 
306                                 if(previousDist <= 0.0000 && dist != previousDist){
307                                         oldList.setLabel("unique");
308                                         printData(&oldList);
309                                 }
310                                 else if(rndDist != rndPreviousDist){
311                                         if (merge) {
312                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
313                                                 
314                                                 if (m->control_pressed) { 
315                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
316                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
317                                                         outputTypes.clear();
318                                                         return 0; 
319                                                 }
320                                                 
321                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
322                                                 printData(temp);
323                                                 delete temp;
324                                         }else{
325                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
326                                                 printData(&oldList);
327                                         }
328                                 }
329         
330                                 previousDist = dist;
331                                 rndPreviousDist = rndDist;
332                                 oldList = *list;
333                                 Seq2Bin = cluster->getSeqtoBin();
334                                 oldSeq2Bin = Seq2Bin;
335                         }
336                         
337                         if(previousDist <= 0.0000){
338                                 oldList.setLabel("unique");
339                                 printData(&oldList);
340                         }
341                         else if(rndPreviousDist<cutoff){
342                                 if (merge) {
343                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
344                                         
345                                         if (m->control_pressed) { 
346                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
347                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
348                                                         outputTypes.clear();
349                                                         return 0; 
350                                         }
351                                         
352                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
353                                         printData(temp);
354                                         delete temp;
355                                 }else{
356                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
357                                         printData(&oldList);
358                                 }
359                         }
360                         
361                         //free memory
362                         overlapMatrix.clear();
363                         delete distMatrix;
364                         delete cluster;
365                         
366                 }else { //use hcluster to cluster
367                         //get distmatrix and overlap
368                         overlapFile = read->getOverlapFile();
369                         distFile = read->getDistFile(); 
370                         delete read;
371                 
372                         //sort the distance and overlap files
373                         sortHclusterFiles(distFile, overlapFile);
374                         
375                         if (m->control_pressed) { 
376                                 delete nameMap;  delete list; delete rabund; 
377                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
378                                 outputTypes.clear();
379                                 return 0; 
380                         }
381                 
382                         //create cluster
383                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
384                         hcluster->setMapWanted(true);
385                         Seq2Bin = cluster->getSeqtoBin();
386                         oldSeq2Bin = Seq2Bin;
387                         
388                         vector<seqDist> seqs; seqs.resize(1); // to start loop
389                         //ifstream inHcluster;
390                         //m->openInputFile(distFile, inHcluster);
391                         
392                         if (m->control_pressed) { 
393                                 delete nameMap;  delete list; delete rabund; delete hcluster;
394                                 listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
395                                 outputTypes.clear();
396                                 return 0; 
397                         }
398
399                         while (seqs.size() != 0){
400                 
401                                 seqs = hcluster->getSeqs();
402                                 
403                                 //to account for cutoff change in average neighbor
404                                 if (seqs.size() != 0) {
405                                         if (seqs[0].dist > cutoff) { break; }
406                                 }
407                                 
408                                 if (m->control_pressed) { 
409                                         delete nameMap;  delete list; delete rabund; delete hcluster;
410                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
411                                         m->mothurRemove(distFile);
412                                         m->mothurRemove(overlapFile);
413                                         outputTypes.clear();
414                                         return 0; 
415                                 }
416                                 
417                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
418                                         
419                                         if (seqs[i].seq1 != seqs[i].seq2) {
420                 
421                                                 cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
422                                                 
423                                                 if (m->control_pressed) { 
424                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
425                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
426                                                         m->mothurRemove(distFile);
427                                                         m->mothurRemove(overlapFile);
428                                                         outputTypes.clear();
429                                                         return 0; 
430                                                 }
431         
432                                                 float rndDist;
433                                                 if (hard) {
434                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
435                                                 }else{
436                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
437                                                 }
438                                                                                                 
439                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
440                                                         oldList.setLabel("unique");
441                                                         printData(&oldList);
442                                                 }
443                                                 else if((rndDist != rndPreviousDist)){
444                                                         if (merge) {
445                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
446                                                                 
447                                                                 if (m->control_pressed) { 
448                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
449                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
450                                                                         m->mothurRemove(distFile);
451                                                                         m->mothurRemove(overlapFile);
452                                                                         outputTypes.clear();
453                                                                         return 0; 
454                                                                 }
455
456                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
457                                                                 printData(temp);
458                                                                 delete temp;
459                                                         }else{
460                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
461                                                                 printData(&oldList);
462                                                         }
463                                                 }
464                                                 
465                                                 previousDist = seqs[i].dist;
466                                                 rndPreviousDist = rndDist;
467                                                 oldList = *list;
468                                                 Seq2Bin = cluster->getSeqtoBin();
469                                                 oldSeq2Bin = Seq2Bin;
470                                         }
471                                 }
472                         }
473                         //inHcluster.close();
474                         
475                         if(previousDist <= 0.0000){
476                                 oldList.setLabel("unique");
477                                 printData(&oldList);
478                         }
479                         else if(rndPreviousDist<cutoff){
480                                 if (merge) {
481                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
482                                         
483                                         if (m->control_pressed) { 
484                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
485                                                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
486                                                         m->mothurRemove(distFile);
487                                                         m->mothurRemove(overlapFile);
488                                                         outputTypes.clear();
489                                                         return 0; 
490                                         }
491                                         
492                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
493                                         printData(temp);
494                                         delete temp;
495                                 }else{
496                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
497                                         printData(&oldList);
498                                 }
499                         }
500                         
501                         delete hcluster;
502                         m->mothurRemove(distFile);
503                         m->mothurRemove(overlapFile);
504                 }
505                 
506                 delete list; 
507                 delete rabund;
508                 listFile.close();
509                 sabundFile.close();
510                 rabundFile.close();
511         
512                 if (m->control_pressed) { 
513                         delete nameMap; 
514                         listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
515                         outputTypes.clear();
516                         return 0; 
517                 }
518                 
519                 m->mothurOutEndLine();
520                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
521                 m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
522                 m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
523                 m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
524                 m->mothurOutEndLine();
525                 
526                 if (saveCutoff != cutoff) { 
527                         if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
528                         else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
529                         
530                         m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
531                 }
532                 
533                 //set list file as new current listfile
534                 string current = "";
535                 itTypes = outputTypes.find("list");
536                 if (itTypes != outputTypes.end()) {
537                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
538                 }
539                 
540                 //set rabund file as new current rabundfile
541                 itTypes = outputTypes.find("rabund");
542                 if (itTypes != outputTypes.end()) {
543                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
544                 }
545                 
546                 //set sabund file as new current sabundfile
547                 itTypes = outputTypes.find("sabund");
548                 if (itTypes != outputTypes.end()) {
549                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
550                 }
551                 
552                 
553                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
554                         
555                 return 0;
556         }
557         catch(exception& e) {
558                 m->errorOut(e, "MGClusterCommand", "execute");
559                 exit(1);
560         }
561 }
562 //**********************************************************************************************************************
563 void MGClusterCommand::printData(ListVector* mergedList){
564         try {
565                 mergedList->print(listFile);
566                 mergedList->getRAbundVector().print(rabundFile);
567                 
568                 SAbundVector sabund = mergedList->getSAbundVector();
569
570                 sabund.print(cout);
571                 sabund.print(sabundFile);
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "MGClusterCommand", "printData");
575                 exit(1);
576         }
577 }
578 //**********************************************************************************************************************
579 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
580 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
581 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
582         try {
583                 //create new listvector so you don't overwrite the clustering
584                 ListVector* newList = new ListVector(oldList);
585
586                 bool done = false;
587                 ifstream inOverlap;
588                 int count = 0;
589                 
590                 if (hclusterWanted) {  
591                         m->openInputFile(overlapFile, inOverlap);  
592                         if (inOverlap.eof()) {  done = true;  }
593                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
594                 
595                 while (!done) {
596                         if (m->control_pressed) { 
597                                 if (hclusterWanted) {   inOverlap.close();  }           
598                                 return newList;
599                         }
600                         
601                         //get next overlap
602                         seqDist overlapNode;
603                         if (!hclusterWanted) {  
604                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
605                                         overlapNode = overlapMatrix[count];
606                                         count++;
607                                 }else { break; }
608                         }else { 
609                                 if (!inOverlap.eof()) {
610                                         string firstName, secondName;
611                                         float overlapDistance;
612                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
613                                         
614                                         //commented out because we check this in readblast already
615                                         //map<string,int>::iterator itA = nameMap->find(firstName);
616                                         //map<string,int>::iterator itB = nameMap->find(secondName);
617                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
618                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
619                                         
620                                         //overlapNode.seq1 = itA->second;
621                                         //overlapNode.seq2 = itB->second;
622                                         overlapNode.seq1 = nameMap->get(firstName);
623                                         overlapNode.seq2 = nameMap->get(secondName);
624                                         overlapNode.dist = overlapDistance;
625                                 }else { inOverlap.close(); break; }
626                         } 
627                 
628                         if (overlapNode.dist < dist) {
629                                 //get names of seqs that overlap
630                                 string name1 = nameMap->get(overlapNode.seq1);
631                                 string name2 = nameMap->get(overlapNode.seq2);
632                         
633                                 //use binInfo to find out if they are already in the same bin
634                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
635                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
636                                 
637                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
638                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
639
640                                 //int binKeep = itBin1->second;
641                                 //int binRemove = itBin2->second;
642                                 
643                                 int binKeep = binInfo[name1];
644                                 int binRemove = binInfo[name2];
645                         
646                                 //if not merge bins and update binInfo
647                                 if(binKeep != binRemove) {
648                 
649                                         //save names in old bin
650                                         string names = newList->get(binRemove);
651                 
652                                         //merge bins into name1s bin
653                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
654                                         newList->set(binRemove, "");    
655                                         
656                                         //update binInfo
657                                         while (names.find_first_of(',') != -1) { 
658                                                 //get name from bin
659                                                 string name = names.substr(0,names.find_first_of(','));
660                                                 //save name and bin number
661                                                 binInfo[name] = binKeep;
662                                                 names = names.substr(names.find_first_of(',')+1, names.length());
663                                         }
664                                         
665                                         //get last name
666                                         binInfo[names] = binKeep;
667                                 }
668                                 
669                         }else { done = true; }
670                 }
671                 
672                 //return listvector
673                 return newList;
674                                 
675         }
676         catch(exception& e) {
677                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
678                 exit(1);
679         }
680 }
681 //**********************************************************************************************************************
682 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
683         try {
684                 //sort distFile
685                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
686                 m->mothurRemove(unsortedDist);  //delete unsorted file
687                 distFile = sortedDistFile;
688                 
689                 //sort overlap file
690                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
691                 m->mothurRemove(unsortedOverlap);  //delete unsorted file
692                 overlapFile = sortedOverlapFile;
693         }
694         catch(exception& e) {
695                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
696                 exit(1);
697         }
698 }
699
700 //**********************************************************************************************************************
701
702
703
704
705