]> git.donarmstrong.com Git - mothur.git/blob - mgclustercommand.cpp
added checks for ^C to quit command instead of program
[mothur.git] / mgclustercommand.cpp
1 /*
2  *  mgclustercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 12/11/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mgclustercommand.h"
11
12 //**********************************************************************************************************************
13 MGClusterCommand::MGClusterCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string,string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("blast");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["blast"] = inputDir + it->second;            }
48                                 }
49                                 
50                                 it = parameters.find("name");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
56                                 }
57                         }
58
59                         
60                         //check for required parameters
61                         blastfile = validParameter.validFile(parameters, "blast", true);
62                         if (blastfile == "not open") { abort = true; }  
63                         else if (blastfile == "not found") { blastfile = ""; }
64                         
65                         //if the user changes the output directory command factory will send this info to us in the output parameter 
66                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
67                                 outputDir = ""; 
68                                 outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it  
69                         }
70                         
71                         namefile = validParameter.validFile(parameters, "name", true);
72                         if (namefile == "not open") { abort = true; }   
73                         else if (namefile == "not found") { namefile = ""; }
74                         
75                         if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
76                         
77                         //check for optional parameter and set defaults
78                         string temp;
79                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
80                         precisionLength = temp.length();
81                         convert(temp, precision); 
82                         
83                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
84                         convert(temp, cutoff); 
85                         cutoff += (5 / (precision * 10.0));
86                         
87                         method = validParameter.validFile(parameters, "method", false);
88                         if (method == "not found") { method = "furthest"; }
89                         
90                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
91                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
92
93                         temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
94                         convert(temp, length); 
95                         
96                         temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
97                         convert(temp, penalty); 
98                         
99                         temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
100                         minWanted = isTrue(temp); 
101                         
102                         temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
103                         merge = isTrue(temp); 
104                         
105                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
106                         hclusterWanted = isTrue(temp); 
107                 }
108
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
112                 exit(1);
113         }
114 }
115 //**********************************************************************************************************************
116
117 void MGClusterCommand::help(){
118         try {
119                 m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
120                 m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
121                 m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
122                 m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
123                 m->mothurOut("The precision parameter's default value is 100. \n");
124                 m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
125                 m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
126                 m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
127                 m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
128                 m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
129                 m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
130                 m->mothurOut("The mgcluster command should be in the following format: \n");
131                 m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
132                 m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "MGClusterCommand", "help");
136                 exit(1);
137         }
138 }
139 //**********************************************************************************************************************
140 MGClusterCommand::~MGClusterCommand(){}
141 //**********************************************************************************************************************
142 int MGClusterCommand::execute(){
143         try {
144                 
145                 if (abort == true) {    return 0;       }
146                 
147                 //read names file
148                 if (namefile != "") {
149                         nameMap = new NameAssignment(namefile);
150                         nameMap->readMap();
151                 }else{ nameMap= new NameAssignment(); }
152                 
153                 string fileroot = outputDir + getRootName(getSimpleName(blastfile));
154                 string tag = "";
155                 time_t start;
156                 float previousDist = 0.00000;
157                 float rndPreviousDist = 0.00000;
158                 
159                 //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
160                 //must remember to delete those objects here since readBlast does not
161                 read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
162                 read->read(nameMap);
163                 
164                 list = new ListVector(nameMap->getListVector());
165                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
166                 
167                 if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
168                 
169                 start = time(NULL);
170                 oldList = *list;
171                 
172                 if (method == "furthest")               { tag = "fn";  }
173                 else if (method == "nearest")   { tag = "nn";  }
174                 else                                                    { tag = "an";  }
175                 
176                 //open output files
177                 openOutputFile(fileroot+ tag + ".list",  listFile);
178                 openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
179                 openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
180                 
181                 if (m->control_pressed) { 
182                         delete nameMap; delete read; delete list; delete rabund; 
183                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
184                         return 0; 
185                 }
186                 
187                 if (!hclusterWanted) {
188                         //get distmatrix and overlap
189                         SparseMatrix* distMatrix = read->getDistMatrix();
190                         overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
191                         delete read;
192                 
193                         //create cluster
194                         if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
195                         else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
196                         else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
197                         cluster->setMapWanted(true);
198                         
199                         if (m->control_pressed) { 
200                                 delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
201                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
202                                 return 0; 
203                         }
204                         
205                         //cluster using cluster classes
206                         while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
207                                 
208                                 cluster->update(cutoff);
209                                 
210                                 if (m->control_pressed) { 
211                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
212                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
213                                         return 0; 
214                                 }
215                                 
216                                 float dist = distMatrix->getSmallDist();
217                                 float rndDist = roundDist(dist, precision);
218                                 
219                                 if(previousDist <= 0.0000 && dist != previousDist){
220                                         oldList.setLabel("unique");
221                                         printData(&oldList);
222                                 }
223                                 else if(rndDist != rndPreviousDist){
224                                         if (merge) {
225                                                 map<string, int> seq2Bin = cluster->getSeqtoBin();
226                                                 ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
227                                                 
228                                                 if (m->control_pressed) { 
229                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
230                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
231                                                         return 0; 
232                                                 }
233                                                 
234                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
235                                                 printData(temp);
236                                                 delete temp;
237                                         }else{
238                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
239                                                 printData(&oldList);
240                                         }
241                                 }
242                                 
243                                 previousDist = dist;
244                                 rndPreviousDist = rndDist;
245                                 oldList = *list;
246                         }
247                         
248                         if(previousDist <= 0.0000){
249                                 oldList.setLabel("unique");
250                                 printData(&oldList);
251                         }
252                         else if(rndPreviousDist<cutoff){
253                                 if (merge) {
254                                         map<string, int> seq2Bin = cluster->getSeqtoBin();
255                                         ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
256                                         
257                                         if (m->control_pressed) { 
258                                                         delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
259                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
260                                                         return 0; 
261                                         }
262                                         
263                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
264                                         printData(temp);
265                                         delete temp;
266                                 }else{
267                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
268                                         printData(&oldList);
269                                 }
270                         }
271                         
272                         //free memory
273                         overlapMatrix.clear();
274                         delete distMatrix;
275                         delete cluster;
276                         
277                 }else { //use hcluster to cluster
278                         //get distmatrix and overlap
279                         overlapFile = read->getOverlapFile();
280                         distFile = read->getDistFile(); 
281                         delete read;
282                 
283                         //sort the distance and overlap files
284                         sortHclusterFiles(distFile, overlapFile);
285                         
286                         if (m->control_pressed) { 
287                                 delete nameMap;  delete list; delete rabund; 
288                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
289                                 return 0; 
290                         }
291                 
292                         //create cluster
293                         hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
294                         hcluster->setMapWanted(true);
295                         
296                         vector<seqDist> seqs; seqs.resize(1); // to start loop
297                         //ifstream inHcluster;
298                         //openInputFile(distFile, inHcluster);
299                         
300                         if (m->control_pressed) { 
301                                 delete nameMap;  delete list; delete rabund; delete hcluster;
302                                 listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
303                                 return 0; 
304                         }
305
306                         while (seqs.size() != 0){
307                 
308                                 seqs = hcluster->getSeqs();
309                                 
310                                 if (m->control_pressed) { 
311                                         delete nameMap;  delete list; delete rabund; delete hcluster;
312                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
313                                         remove(distFile.c_str());
314                                         remove(overlapFile.c_str());
315                                         return 0; 
316                                 }
317                                 
318                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
319                                         
320                                         if (seqs[i].seq1 != seqs[i].seq2) {
321                 
322                                                 hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
323                                                 
324                                                 if (m->control_pressed) { 
325                                                         delete nameMap;  delete list; delete rabund; delete hcluster;
326                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
327                                                         remove(distFile.c_str());
328                                                         remove(overlapFile.c_str());
329                                                         return 0; 
330                                                 }
331         
332                                                 float rndDist = roundDist(seqs[i].dist, precision);
333                                                                                                 
334                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
335                                                         oldList.setLabel("unique");
336                                                         printData(&oldList);
337                                                 }
338                                                 else if((rndDist != rndPreviousDist)){
339                                                         if (merge) {
340                                                                 map<string, int> seq2Bin = hcluster->getSeqtoBin();
341                                                                 ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
342                                                                 
343                                                                 if (m->control_pressed) { 
344                                                                         delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
345                                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
346                                                                         remove(distFile.c_str());
347                                                                         remove(overlapFile.c_str());
348                                                                         return 0; 
349                                                                 }
350
351                                                                 temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
352                                                                 printData(temp);
353                                                                 delete temp;
354                                                         }else{
355                                                                 oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
356                                                                 printData(&oldList);
357                                                         }
358                                                 }
359                                                 
360                                                 previousDist = seqs[i].dist;
361                                                 rndPreviousDist = rndDist;
362                                                 oldList = *list;
363                                         }
364                                 }
365                         }
366                         //inHcluster.close();
367                         
368                         if(previousDist <= 0.0000){
369                                 oldList.setLabel("unique");
370                                 printData(&oldList);
371                         }
372                         else if(rndPreviousDist<cutoff){
373                                 if (merge) {
374                                         map<string, int> seq2Bin = hcluster->getSeqtoBin();
375                                         ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
376                                         
377                                         if (m->control_pressed) { 
378                                                         delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
379                                                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
380                                                         remove(distFile.c_str());
381                                                         remove(overlapFile.c_str());
382                                                         return 0; 
383                                         }
384                                         
385                                         temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
386                                         printData(temp);
387                                         delete temp;
388                                 }else{
389                                         oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
390                                         printData(&oldList);
391                                 }
392                         }
393                         
394                         delete hcluster;
395                         remove(distFile.c_str());
396                         remove(overlapFile.c_str());
397                 }
398                 
399                 delete list; 
400                 delete rabund;
401                 listFile.close();
402                 sabundFile.close();
403                 rabundFile.close();
404         
405                 globaldata->setListFile(fileroot+ tag + ".list");
406                 globaldata->setFormat("list");
407                 
408                 if (m->control_pressed) { 
409                         delete nameMap; 
410                         listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
411                         globaldata->setListFile("");
412                         globaldata->setFormat("");
413                         return 0; 
414                 }
415                 
416                 m->mothurOutEndLine();
417                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
418                 m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   
419                 m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); 
420                 m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); 
421                 m->mothurOutEndLine();
422                 
423                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
424                         
425                 return 0;
426         }
427         catch(exception& e) {
428                 m->errorOut(e, "MGClusterCommand", "execute");
429                 exit(1);
430         }
431 }
432 //**********************************************************************************************************************
433 void MGClusterCommand::printData(ListVector* mergedList){
434         try {
435                 mergedList->print(listFile);
436                 mergedList->getRAbundVector().print(rabundFile);
437                 
438                 SAbundVector sabund = mergedList->getSAbundVector();
439
440                 sabund.print(cout);
441                 sabund.print(sabundFile);
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "MGClusterCommand", "printData");
445                 exit(1);
446         }
447 }
448 //**********************************************************************************************************************
449 //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures
450 //that are used to cluster by distance.  this is done so that the overlapping data does not have more influenece than the distance data.
451 ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
452         try {
453                 //create new listvector so you don't overwrite the clustering
454                 ListVector* newList = new ListVector(oldList);
455                 bool done = false;
456                 ifstream inOverlap;
457                 int count = 0;
458                 
459                 if (hclusterWanted) {  
460                         openInputFile(overlapFile, inOverlap);  
461                         if (inOverlap.eof()) {  done = true;  }
462                 }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
463                 
464                 while (!done) {
465                         if (m->control_pressed) { 
466                                 if (hclusterWanted) {   inOverlap.close();  }           
467                                 return newList;
468                         }
469                         
470                         //get next overlap
471                         seqDist overlapNode;
472                         if (!hclusterWanted) {  
473                                 if (count < overlapMatrix.size()) { //do we have another node in the matrix
474                                         overlapNode = overlapMatrix[count];
475                                         count++;
476                                 }else { break; }
477                         }else { 
478                                 if (!inOverlap.eof()) {
479                                         string firstName, secondName;
480                                         float overlapDistance;
481                                         inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
482                                         
483                                         map<string,int>::iterator itA = nameMap->find(firstName);
484                                         map<string,int>::iterator itB = nameMap->find(secondName);
485                                         if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
486                                         if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
487                                         
488                                         overlapNode.seq1 = itA->second;
489                                         overlapNode.seq2 = itB->second;
490                                         overlapNode.dist = overlapDistance;
491                                 }else { inOverlap.close(); break; }
492                         } 
493                 
494                         if (overlapNode.dist < dist) {
495                                 //get names of seqs that overlap
496                                 string name1 = nameMap->get(overlapNode.seq1);
497                                 string name2 = nameMap->get(overlapNode.seq2);
498                                 
499                                 //use binInfo to find out if they are already in the same bin
500                                 int binKeep = binInfo[name1];
501                                 int binRemove = binInfo[name2];
502                                 
503                                 //if not merge bins and update binInfo
504                                 if(binKeep != binRemove) {
505                                         //save names in old bin
506                                         string names = list->get(binRemove);
507                                         
508                                         //merge bins into name1s bin
509                                         newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
510                                         newList->set(binRemove, "");    
511                                         
512                                         //update binInfo
513                                         while (names.find_first_of(',') != -1) { 
514                                                 //get name from bin
515                                                 string name = names.substr(0,names.find_first_of(','));
516                                                 //save name and bin number
517                                                 binInfo[name] = binKeep;
518                                                 names = names.substr(names.find_first_of(',')+1, names.length());
519                                         }
520                                         
521                                         //get last name
522                                         binInfo[names] = binKeep;
523                                 }
524                                 
525                         }else { done = true; }
526                 }
527                 
528                 //return listvector
529                 return newList;
530                                 
531         }
532         catch(exception& e) {
533                 m->errorOut(e, "MGClusterCommand", "mergeOPFs");
534                 exit(1);
535         }
536 }
537 //**********************************************************************************************************************
538 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
539         try {
540                 //sort distFile
541                 string sortedDistFile = sortFile(unsortedDist);
542                 remove(unsortedDist.c_str());  //delete unsorted file
543                 distFile = sortedDistFile;
544                 
545                 //sort overlap file
546                 string sortedOverlapFile = sortFile(unsortedOverlap);
547                 remove(unsortedOverlap.c_str());  //delete unsorted file
548                 overlapFile = sortedOverlapFile;
549         }
550         catch(exception& e) {
551                 m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
552                 exit(1);
553         }
554 }
555
556 //**********************************************************************************************************************
557
558
559
560
561