]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
fixed cluster.split with average method
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 MGClusterCommand::MGClusterCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string,string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("blast");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
48                                 }
49                                 
50                                 it = parameters.find("name");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
56                                 }
57                         }
58
59                         
60                         //check for required parameters
61                         blastfile = validParameter.validFile(parameters, "blast", true);
62                         if (blastfile == "not open") { abort = true; }  
63                         else if (blastfile == "not found") { blastfile = ""; }
64                         
65                         //if the user changes the output directory command factory will send this info to us in the output parameter 
66                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
67                                 outputDir = ""; 
68                                 outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it  
69                         }
70                         
71                         namefile = validParameter.validFile(parameters, "name", true);
72                         if (namefile == "not open") { abort = true; }   
73                         else if (namefile == "not found") { namefile = ""; }
74                         
75                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
76                         
77                         //check for optional parameter and set defaults
78                         string temp;
79                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
80                         precisionLength = temp.length();
81                         convert(temp, precision); 
82                         
83                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
84                         convert(temp, cutoff); 
85                         cutoff += (5 / (precision * 10.0));
86                         
87                         method = validParameter.validFile(parameters, "method", false);
88                         if (method == "not found") { method = "furthest"; }
89                         
90                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
91                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
92
93                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
94                         convert(temp, length); 
95                         
96                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
97                         convert(temp, penalty); 
98                         
99                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
100                         minWanted = isTrue(temp); 
101                         
102                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
103                         merge = isTrue(temp); 
104                         
105                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
106                         hclusterWanted = isTrue(temp); 
107                         
108                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
109                         hard = isTrue(temp);
110                 }
111
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
115                 exit(1);
116         }
117 }
118 //**********************************************************************************************************************
119
120 void MGClusterCommand::help(){
121         try {
122                 m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
123                 m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
124                 m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
125                 m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
126                 m->mothurOut("The precision parameter's default value is 100. \n");
127                 m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
128                 m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
129                 m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
130                 m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
131                 m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
132                 m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
133                 m->mothurOut("The mgcluster command should be in the following format: \n");
134                 m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
135                 m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "MGClusterCommand", "help");
139                 exit(1);
140         }
141 }
142 //**********************************************************************************************************************
143 MGClusterCommand::~MGClusterCommand(){}
144 //**********************************************************************************************************************
145 int MGClusterCommand::execute(){
146         try {
147                 
148                 if (abort == true) {    return 0;       }
149                 
150                 //read names file
151                 if (namefile != "") {
152                         nameMap = new NameAssignment(namefile);
153                         nameMap->readMap();
154                 }else{ nameMap= new NameAssignment(); }
155                 
156                 string fileroot = outputDir + getRootName(getSimpleName(blastfile));
157                 string tag = "";
158                 time_t start;
159                 float previousDist = 0.00000;
160                 float rndPreviousDist = 0.00000;
161                 
162                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
163                 //must remember to delete those objects here since readBlast does not
164                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
165                 read->read(nameMap);
166                 
167                 list = new ListVector(nameMap->getListVector());
168                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
169                 
170                 if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
171                 
172                 start = time(NULL);
173                 oldList = *list;
174                 map<string, int> Seq2Bin;
175                 map<string, int> oldSeq2Bin;
176                 
177                 if (method == "furthest")               { tag = "fn";  }
178                 else if (method == "nearest")   { tag = "nn";  }
179                 else                                                    { tag = "an";  }
180                 
181                 //open output files
182                 openOutputFile(fileroot+ tag + ".list",  listFile);
183                 openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
184                 openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
185                 
186                 if (m->control_pressed) { 
187                         delete nameMap; delete read; delete list; delete rabund; 
188                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
189                         return 0; 
190                 }
191                 
192                 if (!hclusterWanted) {
193                         //get distmatrix and overlap
194                         SparseMatrix* distMatrix = read->getDistMatrix();
195                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
196                         delete read;
197                 
198                         //create cluster
199                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
200                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
201                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
202                         cluster->setMapWanted(true);
203                         Seq2Bin = cluster->getSeqtoBin();
204                         oldSeq2Bin = Seq2Bin;
205                         
206                         if (m->control_pressed) { 
207                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
208                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
209                                 return 0; 
210                         }
211                         
212                         //cluster using cluster classes
213                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
214                                 
215                                 cluster->update(cutoff);
216                                 
217                                 if (m->control_pressed) { 
218                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
219                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
220                                         return 0; 
221                                 }
222                                 
223                                 float dist = distMatrix->getSmallDist();
224                                 float rndDist;
225                                 if (hard) {
226                                         rndDist = ceilDist(dist, precision); 
227                                 }else{
228                                         rndDist = roundDist(dist, precision); 
229                                 }
230
231                                 
232                                 if(previousDist <= 0.0000 && dist != previousDist){
233                                         oldList.setLabel("unique");
234                                         printData(&oldList);
235                                 }
236                                 else if(rndDist != rndPreviousDist){
237                                         if (merge) {
238                                                 Seq2Bin = cluster->getSeqtoBin();
239                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
240                                                 
241                                                 if (m->control_pressed) { 
242                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
243                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
244                                                         return 0; 
245                                                 }
246                                                 
247                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
248                                                 printData(temp);
249                                                 delete temp;
250                                         }else{
251                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
252                                                 printData(&oldList);
253                                         }
254                                 }
255                                 
256                                 previousDist = dist;
257         cout << "prev distance = " << previousDist << " dist = " << dist << endl;
258                                 rndPreviousDist = rndDist;
259                                 oldList = *list;
260                                 oldSeq2Bin = Seq2Bin;
261                         }
262                         
263                         if(previousDist <= 0.0000){
264                                 oldList.setLabel("unique");
265                                 printData(&oldList);
266                         }
267                         else if(rndPreviousDist<cutoff){
268                                 if (merge) {
269                                         Seq2Bin = cluster->getSeqtoBin();
270                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
271                                         
272                                         if (m->control_pressed) { 
273                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
274                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
275                                                         return 0; 
276                                         }
277                                         
278                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
279                                         printData(temp);
280                                         delete temp;
281                                 }else{
282                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
283                                         printData(&oldList);
284                                 }
285                         }
286                         
287                         //free memory
288                         overlapMatrix.clear();
289                         delete distMatrix;
290                         delete cluster;
291                         
292                 }else { //use hcluster to cluster
293                         //get distmatrix and overlap
294                         overlapFile = read->getOverlapFile();
295                         distFile = read->getDistFile(); 
296                         delete read;
297                 
298                         //sort the distance and overlap files
299                         sortHclusterFiles(distFile, overlapFile);
300                         
301                         if (m->control_pressed) { 
302                                 delete nameMap;  delete list; delete rabund; 
303                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
304                                 return 0; 
305                         }
306                 
307                         //create cluster
308                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
309                         hcluster->setMapWanted(true);
310                         Seq2Bin = cluster->getSeqtoBin();
311                         oldSeq2Bin = Seq2Bin;
312                         
313                         vector<seqDist> seqs; seqs.resize(1); // to start loop
314                         //ifstream inHcluster;
315                         //openInputFile(distFile, inHcluster);
316                         
317                         if (m->control_pressed) { 
318                                 delete nameMap;  delete list; delete rabund; delete hcluster;
319                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
320                                 return 0; 
321                         }
322
323                         while (seqs.size() != 0){
324                 
325                                 seqs = hcluster->getSeqs();
326                                 
327                                 if (m->control_pressed) { 
328                                         delete nameMap;  delete list; delete rabund; delete hcluster;
329                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
330                                         remove(distFile.c_str());
331                                         remove(overlapFile.c_str());
332                                         return 0; 
333                                 }
334                                 
335                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
336                                         
337                                         if (seqs[i].seq1 != seqs[i].seq2) {
338                 
339                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
340                                                 
341                                                 if (m->control_pressed) { 
342                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
343                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
344                                                         remove(distFile.c_str());
345                                                         remove(overlapFile.c_str());
346                                                         return 0; 
347                                                 }
348         
349                                                 float rndDist;
350                                                 if (hard) {
351                                                         rndDist = ceilDist(seqs[i].dist, precision); 
352                                                 }else{
353                                                         rndDist = roundDist(seqs[i].dist, precision); 
354                                                 }
355                                                                                                 
356                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
357                                                         oldList.setLabel("unique");
358                                                         printData(&oldList);
359                                                 }
360                                                 else if((rndDist != rndPreviousDist)){
361                                                         if (merge) {
362                                                                 Seq2Bin = hcluster->getSeqtoBin();
363                                                                 ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
364                                                                 
365                                                                 if (m->control_pressed) { 
366                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
367                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
368                                                                         remove(distFile.c_str());
369                                                                         remove(overlapFile.c_str());
370                                                                         return 0; 
371                                                                 }
372
373                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
374                                                                 printData(temp);
375                                                                 delete temp;
376                                                         }else{
377                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
378                                                                 printData(&oldList);
379                                                         }
380                                                 }
381                                                 
382                                                 previousDist = seqs[i].dist;
383                                                 rndPreviousDist = rndDist;
384                                                 oldList = *list;
385                                                 oldSeq2Bin = Seq2Bin;
386                                         }
387                                 }
388                         }
389                         //inHcluster.close();
390                         
391                         if(previousDist <= 0.0000){
392                                 oldList.setLabel("unique");
393                                 printData(&oldList);
394                         }
395                         else if(rndPreviousDist<cutoff){
396                                 if (merge) {
397                                         Seq2Bin = hcluster->getSeqtoBin();
398                                         ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
399                                         
400                                         if (m->control_pressed) { 
401                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
402                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
403                                                         remove(distFile.c_str());
404                                                         remove(overlapFile.c_str());
405                                                         return 0; 
406                                         }
407                                         
408                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
409                                         printData(temp);
410                                         delete temp;
411                                 }else{
412                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
413                                         printData(&oldList);
414                                 }
415                         }
416                         
417                         delete hcluster;
418                         //remove(distFile.c_str());
419                         //remove(overlapFile.c_str());
420                 }
421                 
422                 delete list; 
423                 delete rabund;
424                 listFile.close();
425                 sabundFile.close();
426                 rabundFile.close();
427         
428                 globaldata->setListFile(fileroot+ tag + ".list");
429                 globaldata->setFormat("list");
430                 
431                 if (m->control_pressed) { 
432                         delete nameMap; 
433                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
434                         globaldata->setListFile("");
435                         globaldata->setFormat("");
436                         return 0; 
437                 }
438                 
439                 m->mothurOutEndLine();
440                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
441                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   
442                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); 
443                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); 
444                 m->mothurOutEndLine();
445                 
446                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
447                         
448                 return 0;
449         }
450         catch(exception& e) {
451                 m->errorOut(e, "MGClusterCommand", "execute");
452                 exit(1);
453         }
454 }
455 //**********************************************************************************************************************
456 void MGClusterCommand::printData(ListVector* mergedList){
457         try {
458                 mergedList->print(listFile);
459                 mergedList->getRAbundVector().print(rabundFile);
460                 
461                 SAbundVector sabund = mergedList->getSAbundVector();
462
463                 sabund.print(cout);
464                 sabund.print(sabundFile);
465         }
466         catch(exception& e) {
467                 m->errorOut(e, "MGClusterCommand", "printData");
468                 exit(1);
469         }
470 }
471 //**********************************************************************************************************************
472 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
473 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
474 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
475         try {
476                 //create new listvector so you don't overwrite the clustering
477                 ListVector* newList = new ListVector(oldList);
478                 bool done = false;
479                 ifstream inOverlap;
480                 int count = 0;
481                 
482                 if (hclusterWanted) {  
483                         openInputFile(overlapFile, inOverlap);  
484                         if (inOverlap.eof()) {  done = true;  }
485                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
486                 
487                 while (!done) {
488                         if (m->control_pressed) { 
489                                 if (hclusterWanted) {   inOverlap.close();  }           
490                                 return newList;
491                         }
492                         
493                         //get next overlap
494                         seqDist overlapNode;
495                         if (!hclusterWanted) {  
496                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
497                                         overlapNode = overlapMatrix[count];
498                                         count++;
499                                 }else { break; }
500                         }else { 
501                                 if (!inOverlap.eof()) {
502                                         string firstName, secondName;
503                                         float overlapDistance;
504                                         inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
505                                         
506                                         map<string,int>::iterator itA = nameMap->find(firstName);
507                                         map<string,int>::iterator itB = nameMap->find(secondName);
508                                         if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
509                                         if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
510                                         
511                                         overlapNode.seq1 = itA->second;
512                                         overlapNode.seq2 = itB->second;
513                                         overlapNode.dist = overlapDistance;
514                                 }else { inOverlap.close(); break; }
515                         } 
516                 
517                         if (overlapNode.dist < dist) {
518                                 //get names of seqs that overlap
519                                 string name1 = nameMap->get(overlapNode.seq1);
520                                 string name2 = nameMap->get(overlapNode.seq2);
521                         
522                                 //use binInfo to find out if they are already in the same bin
523                                 map<string, int>::iterator itBin1 = binInfo.find(name1);
524                                 map<string, int>::iterator itBin2 = binInfo.find(name2);
525                                 
526                                 if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
527                                 if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
528
529                                 int binKeep = itBin1->second;
530                                 int binRemove = itBin2->second;
531                                 
532                                 //if not merge bins and update binInfo
533                                 if(binKeep != binRemove) {
534                 
535                                         //save names in old bin
536                                         string names = newList->get(binRemove);
537                                         
538                                         //merge bins into name1s bin
539                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
540                                         newList->set(binRemove, "");    
541                                         
542                                         //update binInfo
543                                         while (names.find_first_of(',') != -1) { 
544                                                 //get name from bin
545                                                 string name = names.substr(0,names.find_first_of(','));
546                                                 //save name and bin number
547                                                 binInfo[name] = binKeep;
548                                                 names = names.substr(names.find_first_of(',')+1, names.length());
549                                         }
550                                         
551                                         //get last name
552                                         binInfo[names] = binKeep;
553                                 }
554                                 
555                         }else { done = true; }
556                 }
557                 
558                 //return listvector
559                 return newList;
560                                 
561         }
562         catch(exception& e) {
563                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
564                 exit(1);
565         }
566 }
567 //**********************************************************************************************************************
568 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
569         try {
570                 //sort distFile
571                 string sortedDistFile = sortFile(unsortedDist, outputDir);
572                 remove(unsortedDist.c_str());  //delete unsorted file
573                 distFile = sortedDistFile;
574                 
575                 //sort overlap file
576                 string sortedOverlapFile = sortFile(unsortedOverlap, outputDir);
577                 remove(unsortedOverlap.c_str());  //delete unsorted file
578                 overlapFile = sortedOverlapFile;
579         }
580         catch(exception& e) {
581                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
582                 exit(1);
583         }
584 }
585
586 //**********************************************************************************************************************
587
588
589
590
591