]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added groups option to get.oturep command
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 MGClusterCommand::MGClusterCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string,string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("blast");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
48                                 }
49                                 
50                                 it = parameters.find("name");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
56                                 }
57                         }
58
59                         
60                         //check for required parameters
61                         blastfile = validParameter.validFile(parameters, "blast", true);
62                         if (blastfile == "not open") { abort = true; }  
63                         else if (blastfile == "not found") { blastfile = ""; }
64                         
65                         //if the user changes the output directory command factory will send this info to us in the output parameter 
66                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
67                                 outputDir = ""; 
68                                 outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it  
69                         }
70                         
71                         namefile = validParameter.validFile(parameters, "name", true);
72                         if (namefile == "not open") { abort = true; }   
73                         else if (namefile == "not found") { namefile = ""; }
74                         
75                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
76                         
77                         //check for optional parameter and set defaults
78                         string temp;
79                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
80                         precisionLength = temp.length();
81                         convert(temp, precision); 
82                         
83                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
84                         convert(temp, cutoff); 
85                         cutoff += (5 / (precision * 10.0));
86                         
87                         method = validParameter.validFile(parameters, "method", false);
88                         if (method == "not found") { method = "furthest"; }
89                         
90                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
91                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
92
93                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
94                         convert(temp, length); 
95                         
96                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
97                         convert(temp, penalty); 
98                         
99                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
100                         minWanted = isTrue(temp); 
101                         
102                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
103                         merge = isTrue(temp); 
104                         
105                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
106                         hclusterWanted = isTrue(temp); 
107                         
108                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
109                         hard = isTrue(temp);
110                 }
111
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
115                 exit(1);
116         }
117 }
118 //**********************************************************************************************************************
119
120 void MGClusterCommand::help(){
121         try {
122                 m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
123                 m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
124                 m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
125                 m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
126                 m->mothurOut("The precision parameter's default value is 100. \n");
127                 m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
128                 m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
129                 m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
130                 m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
131                 m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
132                 m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
133                 m->mothurOut("The mgcluster command should be in the following format: \n");
134                 m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
135                 m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "MGClusterCommand", "help");
139                 exit(1);
140         }
141 }
142 //**********************************************************************************************************************
143 MGClusterCommand::~MGClusterCommand(){}
144 //**********************************************************************************************************************
145 int MGClusterCommand::execute(){
146         try {
147                 
148                 if (abort == true) {    return 0;       }
149                 
150                 //read names file
151                 if (namefile != "") {
152                         nameMap = new NameAssignment(namefile);
153                         nameMap->readMap();
154                 }else{ nameMap= new NameAssignment(); }
155                 
156                 string fileroot = outputDir + getRootName(getSimpleName(blastfile));
157                 string tag = "";
158                 time_t start;
159                 float previousDist = 0.00000;
160                 float rndPreviousDist = 0.00000;
161                 
162                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
163                 //must remember to delete those objects here since readBlast does not
164                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
165                 read->read(nameMap);
166                 
167                 list = new ListVector(nameMap->getListVector());
168                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
169                 
170                 if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
171                 
172                 start = time(NULL);
173                 oldList = *list;
174                 map<string, int> Seq2Bin;
175                 map<string, int> oldSeq2Bin;
176                 
177                 if (method == "furthest")               { tag = "fn";  }
178                 else if (method == "nearest")   { tag = "nn";  }
179                 else                                                    { tag = "an";  }
180                 
181                 //open output files
182                 openOutputFile(fileroot+ tag + ".list",  listFile);
183                 openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
184                 openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
185                 
186                 if (m->control_pressed) { 
187                         delete nameMap; delete read; delete list; delete rabund; 
188                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
189                         return 0; 
190                 }
191                 
192                 if (!hclusterWanted) {
193                         //get distmatrix and overlap
194                         SparseMatrix* distMatrix = read->getDistMatrix();
195                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
196                         delete read;
197                 
198                         //create cluster
199                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
200                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
201                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
202                         cluster->setMapWanted(true);
203                         Seq2Bin = cluster->getSeqtoBin();
204                         oldSeq2Bin = Seq2Bin;
205                         
206                         if (m->control_pressed) { 
207                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
208                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
209                                 return 0; 
210                         }
211                 
212                         //cluster using cluster classes
213                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
214                                 
215                                 cluster->update(cutoff);
216                                 
217                                 if (m->control_pressed) { 
218                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
219                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
220                                         return 0; 
221                                 }
222                                 
223                                 float dist = distMatrix->getSmallDist();
224                                 float rndDist;
225                                 if (hard) {
226                                         rndDist = ceilDist(dist, precision); 
227                                 }else{
228                                         rndDist = roundDist(dist, precision); 
229                                 }
230                                 
231                                 if(previousDist <= 0.0000 && dist != previousDist){
232                                         oldList.setLabel("unique");
233                                         printData(&oldList);
234                                 }
235                                 else if(rndDist != rndPreviousDist){
236                                         if (merge) {
237                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
238                                                 
239                                                 if (m->control_pressed) { 
240                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
241                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
242                                                         return 0; 
243                                                 }
244                                                 
245                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
246                                                 printData(temp);
247                                                 delete temp;
248                                         }else{
249                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
250                                                 printData(&oldList);
251                                         }
252                                 }
253         
254                                 previousDist = dist;
255                                 rndPreviousDist = rndDist;
256                                 oldList = *list;
257                                 Seq2Bin = cluster->getSeqtoBin();
258                                 oldSeq2Bin = Seq2Bin;
259                         }
260                         
261                         if(previousDist <= 0.0000){
262                                 oldList.setLabel("unique");
263                                 printData(&oldList);
264                         }
265                         else if(rndPreviousDist<cutoff){
266                                 if (merge) {
267                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
268                                         
269                                         if (m->control_pressed) { 
270                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
271                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
272                                                         return 0; 
273                                         }
274                                         
275                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
276                                         printData(temp);
277                                         delete temp;
278                                 }else{
279                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
280                                         printData(&oldList);
281                                 }
282                         }
283                         
284                         //free memory
285                         overlapMatrix.clear();
286                         delete distMatrix;
287                         delete cluster;
288                         
289                 }else { //use hcluster to cluster
290                         //get distmatrix and overlap
291                         overlapFile = read->getOverlapFile();
292                         distFile = read->getDistFile(); 
293                         delete read;
294                 
295                         //sort the distance and overlap files
296                         sortHclusterFiles(distFile, overlapFile);
297                         
298                         if (m->control_pressed) { 
299                                 delete nameMap;  delete list; delete rabund; 
300                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
301                                 return 0; 
302                         }
303                 
304                         //create cluster
305                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
306                         hcluster->setMapWanted(true);
307                         Seq2Bin = cluster->getSeqtoBin();
308                         oldSeq2Bin = Seq2Bin;
309                         
310                         vector<seqDist> seqs; seqs.resize(1); // to start loop
311                         //ifstream inHcluster;
312                         //openInputFile(distFile, inHcluster);
313                         
314                         if (m->control_pressed) { 
315                                 delete nameMap;  delete list; delete rabund; delete hcluster;
316                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
317                                 return 0; 
318                         }
319
320                         while (seqs.size() != 0){
321                 
322                                 seqs = hcluster->getSeqs();
323                                 
324                                 if (m->control_pressed) { 
325                                         delete nameMap;  delete list; delete rabund; delete hcluster;
326                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
327                                         remove(distFile.c_str());
328                                         remove(overlapFile.c_str());
329                                         return 0; 
330                                 }
331                                 
332                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
333                                         
334                                         if (seqs[i].seq1 != seqs[i].seq2) {
335                 
336                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
337                                                 
338                                                 if (m->control_pressed) { 
339                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
340                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
341                                                         remove(distFile.c_str());
342                                                         remove(overlapFile.c_str());
343                                                         return 0; 
344                                                 }
345         
346                                                 float rndDist;
347                                                 if (hard) {
348                                                         rndDist = ceilDist(seqs[i].dist, precision); 
349                                                 }else{
350                                                         rndDist = roundDist(seqs[i].dist, precision); 
351                                                 }
352                                                                                                 
353                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
354                                                         oldList.setLabel("unique");
355                                                         printData(&oldList);
356                                                 }
357                                                 else if((rndDist != rndPreviousDist)){
358                                                         if (merge) {
359                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
360                                                                 
361                                                                 if (m->control_pressed) { 
362                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
363                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
364                                                                         remove(distFile.c_str());
365                                                                         remove(overlapFile.c_str());
366                                                                         return 0; 
367                                                                 }
368
369                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
370                                                                 printData(temp);
371                                                                 delete temp;
372                                                         }else{
373                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
374                                                                 printData(&oldList);
375                                                         }
376                                                 }
377                                                 
378                                                 previousDist = seqs[i].dist;
379                                                 rndPreviousDist = rndDist;
380                                                 oldList = *list;
381                                                 Seq2Bin = cluster->getSeqtoBin();
382                                                 oldSeq2Bin = Seq2Bin;
383                                         }
384                                 }
385                         }
386                         //inHcluster.close();
387                         
388                         if(previousDist <= 0.0000){
389                                 oldList.setLabel("unique");
390                                 printData(&oldList);
391                         }
392                         else if(rndPreviousDist<cutoff){
393                                 if (merge) {
394                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
395                                         
396                                         if (m->control_pressed) { 
397                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
398                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
399                                                         remove(distFile.c_str());
400                                                         remove(overlapFile.c_str());
401                                                         return 0; 
402                                         }
403                                         
404                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
405                                         printData(temp);
406                                         delete temp;
407                                 }else{
408                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
409                                         printData(&oldList);
410                                 }
411                         }
412                         
413                         delete hcluster;
414                         remove(distFile.c_str());
415                         remove(overlapFile.c_str());
416                 }
417                 
418                 delete list; 
419                 delete rabund;
420                 listFile.close();
421                 sabundFile.close();
422                 rabundFile.close();
423         
424                 globaldata->setListFile(fileroot+ tag + ".list");
425                 globaldata->setFormat("list");
426                 
427                 if (m->control_pressed) { 
428                         delete nameMap; 
429                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
430                         globaldata->setListFile("");
431                         globaldata->setFormat("");
432                         return 0; 
433                 }
434                 
435                 m->mothurOutEndLine();
436                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
437                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   
438                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); 
439                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); 
440                 m->mothurOutEndLine();
441                 
442                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
443                         
444                 return 0;
445         }
446         catch(exception& e) {
447                 m->errorOut(e, "MGClusterCommand", "execute");
448                 exit(1);
449         }
450 }
451 //**********************************************************************************************************************
452 void MGClusterCommand::printData(ListVector* mergedList){
453         try {
454                 mergedList->print(listFile);
455                 mergedList->getRAbundVector().print(rabundFile);
456                 
457                 SAbundVector sabund = mergedList->getSAbundVector();
458
459                 sabund.print(cout);
460                 sabund.print(sabundFile);
461         }
462         catch(exception& e) {
463                 m->errorOut(e, "MGClusterCommand", "printData");
464                 exit(1);
465         }
466 }
467 //**********************************************************************************************************************
468 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
469 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
470 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
471         try {
472                 //create new listvector so you don't overwrite the clustering
473                 ListVector* newList = new ListVector(oldList);
474
475                 bool done = false;
476                 ifstream inOverlap;
477                 int count = 0;
478                 
479                 if (hclusterWanted) {  
480                         openInputFile(overlapFile, inOverlap);  
481                         if (inOverlap.eof()) {  done = true;  }
482                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
483                 
484                 while (!done) {
485                         if (m->control_pressed) { 
486                                 if (hclusterWanted) {   inOverlap.close();  }           
487                                 return newList;
488                         }
489                         
490                         //get next overlap
491                         seqDist overlapNode;
492                         if (!hclusterWanted) {  
493                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
494                                         overlapNode = overlapMatrix[count];
495                                         count++;
496                                 }else { break; }
497                         }else { 
498                                 if (!inOverlap.eof()) {
499                                         string firstName, secondName;
500                                         float overlapDistance;
501                                         inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
502                                         
503                                         map<string,int>::iterator itA = nameMap->find(firstName);
504                                         map<string,int>::iterator itB = nameMap->find(secondName);
505                                         if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
506                                         if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
507                                         
508                                         overlapNode.seq1 = itA->second;
509                                         overlapNode.seq2 = itB->second;
510                                         overlapNode.dist = overlapDistance;
511                                 }else { inOverlap.close(); break; }
512                         } 
513                 
514                         if (overlapNode.dist < dist) {
515                                 //get names of seqs that overlap
516                                 string name1 = nameMap->get(overlapNode.seq1);
517                                 string name2 = nameMap->get(overlapNode.seq2);
518                         
519                                 //use binInfo to find out if they are already in the same bin
520                                 map<string, int>::iterator itBin1 = binInfo.find(name1);
521                                 map<string, int>::iterator itBin2 = binInfo.find(name2);
522                                 
523                                 if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
524                                 if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
525
526                                 int binKeep = itBin1->second;
527                                 int binRemove = itBin2->second;
528                         
529                                 //if not merge bins and update binInfo
530                                 if(binKeep != binRemove) {
531                 
532                                         //save names in old bin
533                                         string names = newList->get(binRemove);
534                 
535                                         //merge bins into name1s bin
536                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
537                                         newList->set(binRemove, "");    
538                                         
539                                         //update binInfo
540                                         while (names.find_first_of(',') != -1) { 
541                                                 //get name from bin
542                                                 string name = names.substr(0,names.find_first_of(','));
543                                                 //save name and bin number
544                                                 binInfo[name] = binKeep;
545                                                 names = names.substr(names.find_first_of(',')+1, names.length());
546                                         }
547                                         
548                                         //get last name
549                                         binInfo[names] = binKeep;
550                                 }
551                                 
552                         }else { done = true; }
553                 }
554                 
555                 //return listvector
556                 return newList;
557                                 
558         }
559         catch(exception& e) {
560                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
561                 exit(1);
562         }
563 }
564 //**********************************************************************************************************************
565 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
566         try {
567                 //sort distFile
568                 string sortedDistFile = sortFile(unsortedDist, outputDir);
569                 remove(unsortedDist.c_str());  //delete unsorted file
570                 distFile = sortedDistFile;
571                 
572                 //sort overlap file
573                 string sortedOverlapFile = sortFile(unsortedOverlap, outputDir);
574                 remove(unsortedOverlap.c_str());  //delete unsorted file
575                 overlapFile = sortedOverlapFile;
576         }
577         catch(exception& e) {
578                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
579                 exit(1);
580         }
581 }
582
583 //**********************************************************************************************************************
584
585
586
587
588