]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
changed hard parameter in cluster commands
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 MGClusterCommand::MGClusterCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string,string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("blast");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
48                                 }
49                                 
50                                 it = parameters.find("name");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
56                                 }
57                         }
58
59                         
60                         //check for required parameters
61                         blastfile = validParameter.validFile(parameters, "blast", true);
62                         if (blastfile == "not open") { abort = true; }  
63                         else if (blastfile == "not found") { blastfile = ""; }
64                         
65                         //if the user changes the output directory command factory will send this info to us in the output parameter 
66                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
67                                 outputDir = ""; 
68                                 outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it  
69                         }
70                         
71                         namefile = validParameter.validFile(parameters, "name", true);
72                         if (namefile == "not open") { abort = true; }   
73                         else if (namefile == "not found") { namefile = ""; }
74                         
75                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
76                         
77                         //check for optional parameter and set defaults
78                         string temp;
79                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
80                         precisionLength = temp.length();
81                         convert(temp, precision); 
82                         
83                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
84                         convert(temp, cutoff); 
85                         cutoff += (5 / (precision * 10.0));
86                         
87                         method = validParameter.validFile(parameters, "method", false);
88                         if (method == "not found") { method = "furthest"; }
89                         
90                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
91                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
92
93                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
94                         convert(temp, length); 
95                         
96                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
97                         convert(temp, penalty); 
98                         
99                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
100                         minWanted = isTrue(temp); 
101                         
102                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
103                         merge = isTrue(temp); 
104                         
105                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
106                         hclusterWanted = isTrue(temp); 
107                         
108                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
109                         hard = isTrue(temp);
110                 }
111
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
115                 exit(1);
116         }
117 }
118 //**********************************************************************************************************************
119
120 void MGClusterCommand::help(){
121         try {
122                 m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
123                 m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
124                 m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
125                 m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
126                 m->mothurOut("The precision parameter's default value is 100. \n");
127                 m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
128                 m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
129                 m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
130                 m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
131                 m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
132                 m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
133                 m->mothurOut("The mgcluster command should be in the following format: \n");
134                 m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
135                 m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "MGClusterCommand", "help");
139                 exit(1);
140         }
141 }
142 //**********************************************************************************************************************
143 MGClusterCommand::~MGClusterCommand(){}
144 //**********************************************************************************************************************
145 int MGClusterCommand::execute(){
146         try {
147                 
148                 if (abort == true) {    return 0;       }
149                 
150                 //read names file
151                 if (namefile != "") {
152                         nameMap = new NameAssignment(namefile);
153                         nameMap->readMap();
154                 }else{ nameMap= new NameAssignment(); }
155                 
156                 string fileroot = outputDir + getRootName(getSimpleName(blastfile));
157                 string tag = "";
158                 time_t start;
159                 float previousDist = 0.00000;
160                 float rndPreviousDist = 0.00000;
161                 
162                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
163                 //must remember to delete those objects here since readBlast does not
164                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
165                 read->read(nameMap);
166                 
167                 list = new ListVector(nameMap->getListVector());
168                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
169                 
170                 if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
171                 
172                 start = time(NULL);
173                 oldList = *list;
174                 
175                 if (method == "furthest")               { tag = "fn";  }
176                 else if (method == "nearest")   { tag = "nn";  }
177                 else                                                    { tag = "an";  }
178                 
179                 //open output files
180                 openOutputFile(fileroot+ tag + ".list",  listFile);
181                 openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
182                 openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
183                 
184                 if (m->control_pressed) { 
185                         delete nameMap; delete read; delete list; delete rabund; 
186                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
187                         return 0; 
188                 }
189                 
190                 if (!hclusterWanted) {
191                         //get distmatrix and overlap
192                         SparseMatrix* distMatrix = read->getDistMatrix();
193                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
194                         delete read;
195                 
196                         //create cluster
197                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
198                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
199                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
200                         cluster->setMapWanted(true);
201                         
202                         if (m->control_pressed) { 
203                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
204                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
205                                 return 0; 
206                         }
207                         
208                         //cluster using cluster classes
209                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
210                                 
211                                 cluster->update(cutoff);
212                                 
213                                 if (m->control_pressed) { 
214                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
215                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
216                                         return 0; 
217                                 }
218                                 
219                                 float dist = distMatrix->getSmallDist();
220                                 float rndDist;
221                                 if (hard) {
222                                         rndDist = ceilDist(dist, precision); 
223                                 }else{
224                                         rndDist = roundDist(dist, precision); 
225                                 }
226
227                                 
228                                 if(previousDist <= 0.0000 && dist != previousDist){
229                                         oldList.setLabel("unique");
230                                         printData(&oldList);
231                                 }
232                                 else if(rndDist != rndPreviousDist){
233                                         if (merge) {
234                                                 map<string, int> seq2Bin = cluster->getSeqtoBin();
235                                                 ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
236                                                 
237                                                 if (m->control_pressed) { 
238                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
239                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
240                                                         return 0; 
241                                                 }
242                                                 
243                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
244                                                 printData(temp);
245                                                 delete temp;
246                                         }else{
247                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
248                                                 printData(&oldList);
249                                         }
250                                 }
251                                 
252                                 previousDist = dist;
253                                 rndPreviousDist = rndDist;
254                                 oldList = *list;
255                         }
256                         
257                         if(previousDist <= 0.0000){
258                                 oldList.setLabel("unique");
259                                 printData(&oldList);
260                         }
261                         else if(rndPreviousDist<cutoff){
262                                 if (merge) {
263                                         map<string, int> seq2Bin = cluster->getSeqtoBin();
264                                         ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
265                                         
266                                         if (m->control_pressed) { 
267                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
268                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
269                                                         return 0; 
270                                         }
271                                         
272                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
273                                         printData(temp);
274                                         delete temp;
275                                 }else{
276                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
277                                         printData(&oldList);
278                                 }
279                         }
280                         
281                         //free memory
282                         overlapMatrix.clear();
283                         delete distMatrix;
284                         delete cluster;
285                         
286                 }else { //use hcluster to cluster
287                         //get distmatrix and overlap
288                         overlapFile = read->getOverlapFile();
289                         distFile = read->getDistFile(); 
290                         delete read;
291                 
292                         //sort the distance and overlap files
293                         sortHclusterFiles(distFile, overlapFile);
294                         
295                         if (m->control_pressed) { 
296                                 delete nameMap;  delete list; delete rabund; 
297                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
298                                 return 0; 
299                         }
300                 
301                         //create cluster
302                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
303                         hcluster->setMapWanted(true);
304                         
305                         vector<seqDist> seqs; seqs.resize(1); // to start loop
306                         //ifstream inHcluster;
307                         //openInputFile(distFile, inHcluster);
308                         
309                         if (m->control_pressed) { 
310                                 delete nameMap;  delete list; delete rabund; delete hcluster;
311                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
312                                 return 0; 
313                         }
314
315                         while (seqs.size() != 0){
316                 
317                                 seqs = hcluster->getSeqs();
318                                 
319                                 if (m->control_pressed) { 
320                                         delete nameMap;  delete list; delete rabund; delete hcluster;
321                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
322                                         remove(distFile.c_str());
323                                         remove(overlapFile.c_str());
324                                         return 0; 
325                                 }
326                                 
327                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
328                                         
329                                         if (seqs[i].seq1 != seqs[i].seq2) {
330                 
331                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
332                                                 
333                                                 if (m->control_pressed) { 
334                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
335                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
336                                                         remove(distFile.c_str());
337                                                         remove(overlapFile.c_str());
338                                                         return 0; 
339                                                 }
340         
341                                                 float rndDist;
342                                                 if (hard) {
343                                                         rndDist = ceilDist(seqs[i].dist, precision); 
344                                                 }else{
345                                                         rndDist = roundDist(seqs[i].dist, precision); 
346                                                 }
347                                                                                                 
348                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
349                                                         oldList.setLabel("unique");
350                                                         printData(&oldList);
351                                                 }
352                                                 else if((rndDist != rndPreviousDist)){
353                                                         if (merge) {
354                                                                 map<string, int> seq2Bin = hcluster->getSeqtoBin();
355                                                                 ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
356                                                                 
357                                                                 if (m->control_pressed) { 
358                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
359                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
360                                                                         remove(distFile.c_str());
361                                                                         remove(overlapFile.c_str());
362                                                                         return 0; 
363                                                                 }
364
365                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
366                                                                 printData(temp);
367                                                                 delete temp;
368                                                         }else{
369                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
370                                                                 printData(&oldList);
371                                                         }
372                                                 }
373                                                 
374                                                 previousDist = seqs[i].dist;
375                                                 rndPreviousDist = rndDist;
376                                                 oldList = *list;
377                                         }
378                                 }
379                         }
380                         //inHcluster.close();
381                         
382                         if(previousDist <= 0.0000){
383                                 oldList.setLabel("unique");
384                                 printData(&oldList);
385                         }
386                         else if(rndPreviousDist<cutoff){
387                                 if (merge) {
388                                         map<string, int> seq2Bin = hcluster->getSeqtoBin();
389                                         ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
390                                         
391                                         if (m->control_pressed) { 
392                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
393                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
394                                                         remove(distFile.c_str());
395                                                         remove(overlapFile.c_str());
396                                                         return 0; 
397                                         }
398                                         
399                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
400                                         printData(temp);
401                                         delete temp;
402                                 }else{
403                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
404                                         printData(&oldList);
405                                 }
406                         }
407                         
408                         delete hcluster;
409                         remove(distFile.c_str());
410                         remove(overlapFile.c_str());
411                 }
412                 
413                 delete list; 
414                 delete rabund;
415                 listFile.close();
416                 sabundFile.close();
417                 rabundFile.close();
418         
419                 globaldata->setListFile(fileroot+ tag + ".list");
420                 globaldata->setFormat("list");
421                 
422                 if (m->control_pressed) { 
423                         delete nameMap; 
424                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
425                         globaldata->setListFile("");
426                         globaldata->setFormat("");
427                         return 0; 
428                 }
429                 
430                 m->mothurOutEndLine();
431                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
432                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   
433                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); 
434                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); 
435                 m->mothurOutEndLine();
436                 
437                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
438                         
439                 return 0;
440         }
441         catch(exception& e) {
442                 m->errorOut(e, "MGClusterCommand", "execute");
443                 exit(1);
444         }
445 }
446 //**********************************************************************************************************************
447 void MGClusterCommand::printData(ListVector* mergedList){
448         try {
449                 mergedList->print(listFile);
450                 mergedList->getRAbundVector().print(rabundFile);
451                 
452                 SAbundVector sabund = mergedList->getSAbundVector();
453
454                 sabund.print(cout);
455                 sabund.print(sabundFile);
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "MGClusterCommand", "printData");
459                 exit(1);
460         }
461 }
462 //**********************************************************************************************************************
463 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
464 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
465 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
466         try {
467                 //create new listvector so you don't overwrite the clustering
468                 ListVector* newList = new ListVector(oldList);
469                 bool done = false;
470                 ifstream inOverlap;
471                 int count = 0;
472                 
473                 if (hclusterWanted) {  
474                         openInputFile(overlapFile, inOverlap);  
475                         if (inOverlap.eof()) {  done = true;  }
476                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
477                 
478                 while (!done) {
479                         if (m->control_pressed) { 
480                                 if (hclusterWanted) {   inOverlap.close();  }           
481                                 return newList;
482                         }
483                         
484                         //get next overlap
485                         seqDist overlapNode;
486                         if (!hclusterWanted) {  
487                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
488                                         overlapNode = overlapMatrix[count];
489                                         count++;
490                                 }else { break; }
491                         }else { 
492                                 if (!inOverlap.eof()) {
493                                         string firstName, secondName;
494                                         float overlapDistance;
495                                         inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
496                                         
497                                         map<string,int>::iterator itA = nameMap->find(firstName);
498                                         map<string,int>::iterator itB = nameMap->find(secondName);
499                                         if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
500                                         if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
501                                         
502                                         overlapNode.seq1 = itA->second;
503                                         overlapNode.seq2 = itB->second;
504                                         overlapNode.dist = overlapDistance;
505                                 }else { inOverlap.close(); break; }
506                         } 
507                 
508                         if (overlapNode.dist < dist) {
509                                 //get names of seqs that overlap
510                                 string name1 = nameMap->get(overlapNode.seq1);
511                                 string name2 = nameMap->get(overlapNode.seq2);
512                                 
513                                 //use binInfo to find out if they are already in the same bin
514                                 int binKeep = binInfo[name1];
515                                 int binRemove = binInfo[name2];
516                                 
517                                 //if not merge bins and update binInfo
518                                 if(binKeep != binRemove) {
519                                         //save names in old bin
520                                         string names = list->get(binRemove);
521                                         
522                                         //merge bins into name1s bin
523                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
524                                         newList->set(binRemove, "");    
525                                         
526                                         //update binInfo
527                                         while (names.find_first_of(',') != -1) { 
528                                                 //get name from bin
529                                                 string name = names.substr(0,names.find_first_of(','));
530                                                 //save name and bin number
531                                                 binInfo[name] = binKeep;
532                                                 names = names.substr(names.find_first_of(',')+1, names.length());
533                                         }
534                                         
535                                         //get last name
536                                         binInfo[names] = binKeep;
537                                 }
538                                 
539                         }else { done = true; }
540                 }
541                 
542                 //return listvector
543                 return newList;
544                                 
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
548                 exit(1);
549         }
550 }
551 //**********************************************************************************************************************
552 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
553         try {
554                 //sort distFile
555                 string sortedDistFile = sortFile(unsortedDist, outputDir);
556                 remove(unsortedDist.c_str());  //delete unsorted file
557                 distFile = sortedDistFile;
558                 
559                 //sort overlap file
560                 string sortedOverlapFile = sortFile(unsortedOverlap, outputDir);
561                 remove(unsortedOverlap.c_str());  //delete unsorted file
562                 overlapFile = sortedOverlapFile;
563         }
564         catch(exception& e) {
565                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
566                 exit(1);
567         }
568 }
569
570 //**********************************************************************************************************************
571
572
573
574
575