]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12
13 //**********************************************************************************************************************
14 vector<string> MGClusterCommand::getValidParameters(){  
15         try {
16                 string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "MGClusterCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 MGClusterCommand::MGClusterCommand(){   
27         try {
28                 abort = true; calledHelp = true; 
29                 vector<string> tempOutNames;
30                 outputTypes["list"] = tempOutNames;
31                 outputTypes["rabund"] = tempOutNames;
32                 outputTypes["sabund"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> MGClusterCommand::getRequiredParameters(){       
41         try {
42                 string Array[] =  {"blast"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "MGClusterCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> MGClusterCommand::getRequiredFiles(){    
53         try {
54                 vector<string> myArray;
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "MGClusterCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 MGClusterCommand::MGClusterCommand(string option) {
64         try {
65                 globaldata = GlobalData::getInstance();
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
74                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string, string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         map<string,string>::iterator it;
81                 
82                         //check to make sure all parameters are valid for command
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["list"] = tempOutNames;
90                         outputTypes["rabund"] = tempOutNames;
91                         outputTypes["sabund"] = tempOutNames;
92                         
93                         //if the user changes the input directory command factory will send this info to us in the output parameter 
94                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
95                         if (inputDir == "not found"){   inputDir = "";          }
96                         else {
97                                 string path;
98                                 it = parameters.find("blast");
99                                 //user has given a template file
100                                 if(it != parameters.end()){ 
101                                         path = m->hasPath(it->second);
102                                         //if the user has not given a path then, add inputdir. else leave path alone.
103                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
104                                 }
105                                 
106                                 it = parameters.find("name");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
112                                 }
113                         }
114
115                         
116                         //check for required parameters
117                         blastfile = validParameter.validFile(parameters, "blast", true);
118                         if (blastfile == "not open") { abort = true; }  
119                         else if (blastfile == "not found") { blastfile = ""; }
120                         
121                         //if the user changes the output directory command factory will send this info to us in the output parameter 
122                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
123                                 outputDir = ""; 
124                                 outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
125                         }
126                         
127                         namefile = validParameter.validFile(parameters, "name", true);
128                         if (namefile == "not open") { abort = true; }   
129                         else if (namefile == "not found") { namefile = ""; }
130                         
131                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
132                         
133                         //check for optional parameter and set defaults
134                         string temp;
135                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
136                         precisionLength = temp.length();
137                         convert(temp, precision); 
138                         
139                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
140                         convert(temp, cutoff); 
141                         cutoff += (5 / (precision * 10.0));
142                         
143                         method = validParameter.validFile(parameters, "method", false);
144                         if (method == "not found") { method = "furthest"; }
145                         
146                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
147                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
148
149                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
150                         convert(temp, length); 
151                         
152                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
153                         convert(temp, penalty); 
154                         
155                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
156                         minWanted = m->isTrue(temp); 
157                         
158                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
159                         merge = m->isTrue(temp); 
160                         
161                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
162                         hclusterWanted = m->isTrue(temp); 
163                         
164                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
165                         hard = m->isTrue(temp);
166                 }
167
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
171                 exit(1);
172         }
173 }
174 //**********************************************************************************************************************
175
176 void MGClusterCommand::help(){
177         try {
178                 m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
179                 m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
180                 m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
181                 m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
182                 m->mothurOut("The precision parameter's default value is 100. \n");
183                 m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
184                 m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
185                 m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
186                 m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
187                 m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
188                 m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
189                 m->mothurOut("The mgcluster command should be in the following format: \n");
190                 m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
191                 m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "MGClusterCommand", "help");
195                 exit(1);
196         }
197 }
198 //**********************************************************************************************************************
199 MGClusterCommand::~MGClusterCommand(){}
200 //**********************************************************************************************************************
201 int MGClusterCommand::execute(){
202         try {
203                 
204                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
205                 
206                 //read names file
207                 if (namefile != "") {
208                         nameMap = new NameAssignment(namefile);
209                         nameMap->readMap();
210                 }else{ nameMap= new NameAssignment(); }
211                 
212                 string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
213                 string tag = "";
214                 time_t start;
215                 float previousDist = 0.00000;
216                 float rndPreviousDist = 0.00000;
217                 
218                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
219                 //must remember to delete those objects here since readBlast does not
220                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
221                 read->read(nameMap);
222                 
223                 list = new ListVector(nameMap->getListVector());
224                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
225                 
226                 if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
227                 
228                 start = time(NULL);
229                 oldList = *list;
230                 map<string, int> Seq2Bin;
231                 map<string, int> oldSeq2Bin;
232                 
233                 if (method == "furthest")               { tag = "fn";  }
234                 else if (method == "nearest")   { tag = "nn";  }
235                 else                                                    { tag = "an";  }
236                 
237                 //open output files
238                 m->openOutputFile(fileroot+ tag + ".list",  listFile);
239                 m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
240                 m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
241                 
242                 if (m->control_pressed) { 
243                         delete nameMap; delete read; delete list; delete rabund; 
244                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
245                         outputTypes.clear();
246                         return 0; 
247                 }
248                 
249                 if (!hclusterWanted) {
250                         //get distmatrix and overlap
251                         SparseMatrix* distMatrix = read->getDistMatrix();
252                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
253                         delete read;
254                 
255                         //create cluster
256                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
257                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
258                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
259                         cluster->setMapWanted(true);
260                         Seq2Bin = cluster->getSeqtoBin();
261                         oldSeq2Bin = Seq2Bin;
262                         
263                         if (m->control_pressed) { 
264                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
265                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
266                                 outputTypes.clear();
267                                 return 0; 
268                         }
269                 
270                         //cluster using cluster classes
271                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
272                                 
273                                 cluster->update(cutoff);
274                                 
275                                 if (m->control_pressed) { 
276                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
277                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
278                                         outputTypes.clear();
279                                         return 0; 
280                                 }
281                                 
282                                 float dist = distMatrix->getSmallDist();
283                                 float rndDist;
284                                 if (hard) {
285                                         rndDist = m->ceilDist(dist, precision); 
286                                 }else{
287                                         rndDist = m->roundDist(dist, precision); 
288                                 }
289                                 
290                                 if(previousDist <= 0.0000 && dist != previousDist){
291                                         oldList.setLabel("unique");
292                                         printData(&oldList);
293                                 }
294                                 else if(rndDist != rndPreviousDist){
295                                         if (merge) {
296                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
297                                                 
298                                                 if (m->control_pressed) { 
299                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
300                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
301                                                         outputTypes.clear();
302                                                         return 0; 
303                                                 }
304                                                 
305                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
306                                                 printData(temp);
307                                                 delete temp;
308                                         }else{
309                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
310                                                 printData(&oldList);
311                                         }
312                                 }
313         
314                                 previousDist = dist;
315                                 rndPreviousDist = rndDist;
316                                 oldList = *list;
317                                 Seq2Bin = cluster->getSeqtoBin();
318                                 oldSeq2Bin = Seq2Bin;
319                         }
320                         
321                         if(previousDist <= 0.0000){
322                                 oldList.setLabel("unique");
323                                 printData(&oldList);
324                         }
325                         else if(rndPreviousDist<cutoff){
326                                 if (merge) {
327                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
328                                         
329                                         if (m->control_pressed) { 
330                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
331                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
332                                                         outputTypes.clear();
333                                                         return 0; 
334                                         }
335                                         
336                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
337                                         printData(temp);
338                                         delete temp;
339                                 }else{
340                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
341                                         printData(&oldList);
342                                 }
343                         }
344                         
345                         //free memory
346                         overlapMatrix.clear();
347                         delete distMatrix;
348                         delete cluster;
349                         
350                 }else { //use hcluster to cluster
351                         //get distmatrix and overlap
352                         overlapFile = read->getOverlapFile();
353                         distFile = read->getDistFile(); 
354                         delete read;
355                 
356                         //sort the distance and overlap files
357                         sortHclusterFiles(distFile, overlapFile);
358                         
359                         if (m->control_pressed) { 
360                                 delete nameMap;  delete list; delete rabund; 
361                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
362                                 outputTypes.clear();
363                                 return 0; 
364                         }
365                 
366                         //create cluster
367                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
368                         hcluster->setMapWanted(true);
369                         Seq2Bin = cluster->getSeqtoBin();
370                         oldSeq2Bin = Seq2Bin;
371                         
372                         vector<seqDist> seqs; seqs.resize(1); // to start loop
373                         //ifstream inHcluster;
374                         //m->openInputFile(distFile, inHcluster);
375                         
376                         if (m->control_pressed) { 
377                                 delete nameMap;  delete list; delete rabund; delete hcluster;
378                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
379                                 outputTypes.clear();
380                                 return 0; 
381                         }
382
383                         while (seqs.size() != 0){
384                 
385                                 seqs = hcluster->getSeqs();
386                                 
387                                 if (m->control_pressed) { 
388                                         delete nameMap;  delete list; delete rabund; delete hcluster;
389                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
390                                         remove(distFile.c_str());
391                                         remove(overlapFile.c_str());
392                                         outputTypes.clear();
393                                         return 0; 
394                                 }
395                                 
396                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
397                                         
398                                         if (seqs[i].seq1 != seqs[i].seq2) {
399                 
400                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
401                                                 
402                                                 if (m->control_pressed) { 
403                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
404                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
405                                                         remove(distFile.c_str());
406                                                         remove(overlapFile.c_str());
407                                                         outputTypes.clear();
408                                                         return 0; 
409                                                 }
410         
411                                                 float rndDist;
412                                                 if (hard) {
413                                                         rndDist = m->ceilDist(seqs[i].dist, precision); 
414                                                 }else{
415                                                         rndDist = m->roundDist(seqs[i].dist, precision); 
416                                                 }
417                                                                                                 
418                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
419                                                         oldList.setLabel("unique");
420                                                         printData(&oldList);
421                                                 }
422                                                 else if((rndDist != rndPreviousDist)){
423                                                         if (merge) {
424                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
425                                                                 
426                                                                 if (m->control_pressed) { 
427                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
428                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
429                                                                         remove(distFile.c_str());
430                                                                         remove(overlapFile.c_str());
431                                                                         outputTypes.clear();
432                                                                         return 0; 
433                                                                 }
434
435                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
436                                                                 printData(temp);
437                                                                 delete temp;
438                                                         }else{
439                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
440                                                                 printData(&oldList);
441                                                         }
442                                                 }
443                                                 
444                                                 previousDist = seqs[i].dist;
445                                                 rndPreviousDist = rndDist;
446                                                 oldList = *list;
447                                                 Seq2Bin = cluster->getSeqtoBin();
448                                                 oldSeq2Bin = Seq2Bin;
449                                         }
450                                 }
451                         }
452                         //inHcluster.close();
453                         
454                         if(previousDist <= 0.0000){
455                                 oldList.setLabel("unique");
456                                 printData(&oldList);
457                         }
458                         else if(rndPreviousDist<cutoff){
459                                 if (merge) {
460                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
461                                         
462                                         if (m->control_pressed) { 
463                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
464                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
465                                                         remove(distFile.c_str());
466                                                         remove(overlapFile.c_str());
467                                                         outputTypes.clear();
468                                                         return 0; 
469                                         }
470                                         
471                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
472                                         printData(temp);
473                                         delete temp;
474                                 }else{
475                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
476                                         printData(&oldList);
477                                 }
478                         }
479                         
480                         delete hcluster;
481                         remove(distFile.c_str());
482                         remove(overlapFile.c_str());
483                 }
484                 
485                 delete list; 
486                 delete rabund;
487                 listFile.close();
488                 sabundFile.close();
489                 rabundFile.close();
490         
491                 globaldata->setListFile(fileroot+ tag + ".list");
492                 globaldata->setFormat("list");
493                 
494                 if (m->control_pressed) { 
495                         delete nameMap; 
496                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
497                         globaldata->setListFile("");
498                         globaldata->setFormat("");
499                         outputTypes.clear();
500                         return 0; 
501                 }
502                 
503                 m->mothurOutEndLine();
504                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
505                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
506                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
507                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
508                 m->mothurOutEndLine();
509                 
510                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
511                         
512                 return 0;
513         }
514         catch(exception& e) {
515                 m->errorOut(e, "MGClusterCommand", "execute");
516                 exit(1);
517         }
518 }
519 //**********************************************************************************************************************
520 void MGClusterCommand::printData(ListVector* mergedList){
521         try {
522                 mergedList->print(listFile);
523                 mergedList->getRAbundVector().print(rabundFile);
524                 
525                 SAbundVector sabund = mergedList->getSAbundVector();
526
527                 sabund.print(cout);
528                 sabund.print(sabundFile);
529         }
530         catch(exception& e) {
531                 m->errorOut(e, "MGClusterCommand", "printData");
532                 exit(1);
533         }
534 }
535 //**********************************************************************************************************************
536 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
537 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
538 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
539         try {
540                 //create new listvector so you don't overwrite the clustering
541                 ListVector* newList = new ListVector(oldList);
542
543                 bool done = false;
544                 ifstream inOverlap;
545                 int count = 0;
546                 
547                 if (hclusterWanted) {  
548                         m->openInputFile(overlapFile, inOverlap);  
549                         if (inOverlap.eof()) {  done = true;  }
550                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
551                 
552                 while (!done) {
553                         if (m->control_pressed) { 
554                                 if (hclusterWanted) {   inOverlap.close();  }           
555                                 return newList;
556                         }
557                         
558                         //get next overlap
559                         seqDist overlapNode;
560                         if (!hclusterWanted) {  
561                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
562                                         overlapNode = overlapMatrix[count];
563                                         count++;
564                                 }else { break; }
565                         }else { 
566                                 if (!inOverlap.eof()) {
567                                         string firstName, secondName;
568                                         float overlapDistance;
569                                         inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
570                                         
571                                         //commented out because we check this in readblast already
572                                         //map<string,int>::iterator itA = nameMap->find(firstName);
573                                         //map<string,int>::iterator itB = nameMap->find(secondName);
574                                         //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
575                                         //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
576                                         
577                                         //overlapNode.seq1 = itA->second;
578                                         //overlapNode.seq2 = itB->second;
579                                         overlapNode.seq1 = nameMap->get(firstName);
580                                         overlapNode.seq2 = nameMap->get(secondName);
581                                         overlapNode.dist = overlapDistance;
582                                 }else { inOverlap.close(); break; }
583                         } 
584                 
585                         if (overlapNode.dist < dist) {
586                                 //get names of seqs that overlap
587                                 string name1 = nameMap->get(overlapNode.seq1);
588                                 string name2 = nameMap->get(overlapNode.seq2);
589                         
590                                 //use binInfo to find out if they are already in the same bin
591                                 //map<string, int>::iterator itBin1 = binInfo.find(name1);
592                                 //map<string, int>::iterator itBin2 = binInfo.find(name2);
593                                 
594                                 //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
595                                 //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
596
597                                 //int binKeep = itBin1->second;
598                                 //int binRemove = itBin2->second;
599                                 
600                                 int binKeep = binInfo[name1];
601                                 int binRemove = binInfo[name2];
602                         
603                                 //if not merge bins and update binInfo
604                                 if(binKeep != binRemove) {
605                 
606                                         //save names in old bin
607                                         string names = newList->get(binRemove);
608                 
609                                         //merge bins into name1s bin
610                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
611                                         newList->set(binRemove, "");    
612                                         
613                                         //update binInfo
614                                         while (names.find_first_of(',') != -1) { 
615                                                 //get name from bin
616                                                 string name = names.substr(0,names.find_first_of(','));
617                                                 //save name and bin number
618                                                 binInfo[name] = binKeep;
619                                                 names = names.substr(names.find_first_of(',')+1, names.length());
620                                         }
621                                         
622                                         //get last name
623                                         binInfo[names] = binKeep;
624                                 }
625                                 
626                         }else { done = true; }
627                 }
628                 
629                 //return listvector
630                 return newList;
631                                 
632         }
633         catch(exception& e) {
634                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
635                 exit(1);
636         }
637 }
638 //**********************************************************************************************************************
639 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
640         try {
641                 //sort distFile
642                 string sortedDistFile = m->sortFile(unsortedDist, outputDir);
643                 remove(unsortedDist.c_str());  //delete unsorted file
644                 distFile = sortedDistFile;
645                 
646                 //sort overlap file
647                 string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
648                 remove(unsortedOverlap.c_str());  //delete unsorted file
649                 overlapFile = sortedOverlapFile;
650         }
651         catch(exception& e) {
652                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
653                 exit(1);
654         }
655 }
656
657 //**********************************************************************************************************************
658
659
660
661
662