]> git.donarmstrong.com Git - mothur.git/blob - splitmatrix.cpp
changes to blastdb to make filenames given to blast unique, changes to split.abund...
[mothur.git] / splitmatrix.cpp
1 /*
2  *  splitmatrix.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitmatrix.h"
11 #include "phylotree.h"
12
13 /***********************************************************************/
14
15 SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, string t, bool l){
16         m = MothurOut::getInstance();
17         distFile = distfile;
18         cutoff = c;
19         namefile = name;
20         method = t;
21         taxFile = tax;
22         large = l;
23 }
24
25 /***********************************************************************/
26
27 int SplitMatrix::split(){
28         try {
29         
30                 if (method == "distance") {  
31                         splitDistance();
32                 }else if (method == "classify") {
33                         splitClassify();
34                 }else {
35                         m->mothurOut("Unknown splitting method, aborting split."); m->mothurOutEndLine();
36                         map<string, string> temp;
37                         temp[distFile] = namefile;
38                         dists.push_back(temp);
39                 }
40                 
41                 return 0;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "SplitMatrix", "split");
45                 exit(1);
46         }
47 }
48 /***********************************************************************/
49 int SplitMatrix::splitDistance(){
50         try {
51         
52                 if (large)      { splitDistanceLarge(); }
53                 else            { splitDistanceRAM();   }
54                         
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "SplitMatrix", "splitDistance");
58                 exit(1);
59         }
60 }
61
62 /***********************************************************************/
63 int SplitMatrix::splitClassify(){
64         try {
65                 cutoff = int(cutoff);
66                 
67                 map<string, int> seqGroup;
68                 map<string, int>::iterator it;
69                 map<string, int>::iterator it2;
70                 
71                 int numGroups = 0;
72                 
73                 //build tree from users taxonomy file
74                 PhyloTree* phylo = new PhyloTree();
75                 
76                 ifstream in;
77                 openInputFile(taxFile, in);
78                         
79                 //read in users taxonomy file and add sequences to tree
80                 string seqname, tax;
81                 while(!in.eof()){
82                         in >> seqname >> tax; gobble(in);
83                                 
84                         phylo->addSeqToTree(seqname, tax);
85                 }
86                 in.close();
87                 
88                 phylo->assignHeirarchyIDs(0);
89
90                 //make sure the cutoff is not greater than maxlevel
91                 if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); }
92                 
93                 //for each node in tree
94                 for (int i = 0; i < phylo->getNumNodes(); i++) {
95                 
96                         //is this node within the cutoff
97                         TaxNode taxon = phylo->get(i);
98                 
99                         if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences
100                                 if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton
101                                         for (int j = 0; j < taxon.accessions.size(); j++) {
102                                                 seqGroup[taxon.accessions[j]] = numGroups;
103                                         }
104                                         numGroups++;
105                                 }
106                         }
107                 }
108
109                 ifstream dFile;
110                 openInputFile(distFile, dFile);
111                 ofstream outFile;
112                 
113                 for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
114                         remove((distFile + "." + toString(i) + ".temp").c_str());
115                 }
116                 
117                 
118                 //for buffering the io to improve speed
119                  //allow for 10 dists to be stored, then output.
120                 vector<string> outputs;  outputs.resize(numGroups, "");
121                 vector<int> numOutputs;  numOutputs.resize(numGroups, 0);       
122                 
123                 //for each distance
124                 while(dFile){
125                         string seqA, seqB;
126                         float dist;
127                         
128                         if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str());        } }
129                         
130                         dFile >> seqA >> seqB >> dist;  gobble(dFile);
131                         
132                         //if both sequences are in the same group then they are within the cutoff
133                         it = seqGroup.find(seqA);
134                         it2 = seqGroup.find(seqB);
135                         
136                         if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons 
137                                 if (it->second == it2->second) { //they are from the same group so add the distance
138                                         if (numOutputs[it->second] > 10) {
139                                                 openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
140                                                 outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
141                                                 outFile.close();
142                                                 outputs[it->second] = "";
143                                                 numOutputs[it->second] = 0;
144                                         }else{
145                                                 outputs[it->second] += seqA + '\t' + seqB + '\t' + toString(dist)  + '\n';
146                                                 numOutputs[it->second]++;
147                                         }
148                                 }
149                         }
150                 }
151                 dFile.close();
152         
153                 for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
154                         remove((namefile + "." + toString(i) + ".temp").c_str());
155                         
156                         //write out any remaining buffers
157                         if (numOutputs[it->second] > 0) {
158                                 openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
159                                 outFile << outputs[i];
160                                 outFile.close();
161                                 outputs[i] = "";
162                                 numOutputs[i] = 0;
163                         }
164                 }
165                 
166                 ifstream bigNameFile;
167                 openInputFile(namefile, bigNameFile);
168                 
169                 singleton = namefile + ".extra.temp";
170                 ofstream remainingNames;
171                 openOutputFile(singleton, remainingNames);
172                 
173                 bool wroteExtra = false;
174                                                 
175                 string name, nameList;
176                 while(!bigNameFile.eof()){
177                         bigNameFile >> name >> nameList;  gobble(bigNameFile);
178                         
179                         //did this sequence get assigned a group
180                         it = seqGroup.find(name);
181                         
182                         if (it != seqGroup.end()) {  
183                                 openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
184                                 outFile << name << '\t' << nameList << endl;
185                                 outFile.close();
186                         }else{
187                                 wroteExtra = true;
188                                 remainingNames << name << '\t' << nameList << endl;
189                         }
190                 }
191                 bigNameFile.close();
192                 remainingNames.close();
193                 
194                 if (!wroteExtra) { 
195                         remove(singleton.c_str());
196                         singleton = "none";
197                 }
198                         
199                 for(int i=0;i<numGroups;i++){
200                         string tempNameFile = namefile + "." + toString(i) + ".temp";
201                         string tempDistFile = distFile + "." + toString(i) + ".temp";
202                                 
203                         map<string, string> temp;
204                         temp[tempDistFile] = tempNameFile;
205                         dists.push_back(temp);
206                 }
207                 
208                 if (m->control_pressed)  {  
209                         for (int i = 0; i < dists.size(); i++) { 
210                                 remove((dists[i].begin()->first).c_str());
211                                 remove((dists[i].begin()->second).c_str());
212                         }
213                         dists.clear();
214                 }
215                 
216                 return 0;
217                         
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "SplitMatrix", "splitClassify");
221                 exit(1);
222         }
223 }
224 /***********************************************************************/
225 int SplitMatrix::splitDistanceLarge(){
226         try {
227                 vector<set<string> > groups;
228                 
229                 //for buffering the io to improve speed
230                  //allow for 30 dists to be stored, then output.
231                 vector<string> outputs;
232                 vector<int> numOutputs;
233                 vector<bool> wroteOutPut;
234                 
235                 int numGroups = 0;
236
237                 ofstream outFile;
238                 ifstream dFile;
239                 openInputFile(distFile, dFile);
240         
241                 while(dFile){
242                         string seqA, seqB;
243                         float dist;
244
245                         dFile >> seqA >> seqB >> dist;
246                         
247                         if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
248                                         
249                         if(dist < cutoff){
250                                 //cout << "in cutoff: " << dist << endl;
251                                 int groupIDA = -1;
252                                 int groupIDB = -1;
253                                 int groupID = -1;
254                                 
255                                 for(int i=0;i<numGroups;i++){
256                                         set<string>::iterator aIt = groups[i].find(seqA);
257                                         set<string>::iterator bIt = groups[i].find(seqB);
258                                         
259                                         if(groupIDA == -1 && aIt != groups[i].end()){//seqA is not already assigned to a group and is in group[i], so assign seqB to group[i]
260                                                 groups[i].insert(seqB);
261                                                 groupIDA = i;
262                                                 groupID = groupIDA;
263
264                                                 //cout << "in aIt: " << groupID << endl;
265         //                                      break;
266                                         }
267                                         else if(groupIDB == -1 && bIt != groups[i].end()){//seqB is not already assigned to a group and is in group[i], so assign seqA to group[i]
268                                                 groups[i].insert(seqA);
269                                                 groupIDB = i;
270                                                 groupID = groupIDB;
271
272                                         //      cout << "in bIt: " << groupID << endl;
273         //                                      break;
274                                         }
275                                 
276                                         if(groupIDA != -1 && groupIDB != -1){//both ifs above have been executed, so we need to decide who to assign them to
277                                                 if(groupIDA < groupIDB){
278                                                 //      cout << "A: " << groupIDA << "\t" << groupIDB << endl;
279                                                         groups[groupIDA].insert(groups[groupIDB].begin(), groups[groupIDB].end()); //merge two groups into groupIDA
280                                                         groups[groupIDB].clear(); 
281                                                         groupID = groupIDA;
282                                                 }
283                                                 else{
284                                                 //      cout << "B: " << groupIDA << "\t" << groupIDB << endl;
285                                                         groups[groupIDB].insert(groups[groupIDA].begin(), groups[groupIDA].end()); //merge two groups into groupIDB
286                                                         groups[groupIDA].clear();  
287                                                         groupID = groupIDB;
288                                                 }
289                                                 break;
290                                         }
291                                 }
292                                 
293         //windows is gonna gag on the reuse of outFile, will need to make it local...
294                                 
295                                 if(groupIDA == -1 && groupIDB == -1){ //we need a new group
296                                         set<string> newGroup;
297                                         newGroup.insert(seqA);
298                                         newGroup.insert(seqB);
299                                         groups.push_back(newGroup);
300                                                                         
301                                         string tempOut = seqA + '\t' + seqB + '\t' + toString(dist) + '\n';
302                                         outputs.push_back(tempOut);
303                                         numOutputs.push_back(1);
304                                         wroteOutPut.push_back(false);
305                                         
306                                         numGroups++;
307                                 }
308                                 else{
309                                         string fileName = distFile + "." + toString(groupID) + ".temp";
310                                                                                         
311                                         //have we reached the max buffer size
312                                         if (numOutputs[groupID] > 60) { //write out sequence
313                                                 outFile.open(fileName.c_str(), ios::app);
314                                                 outFile << outputs[groupID] << seqA << '\t' << seqB << '\t' << dist << endl;
315                                                 outFile.close();
316                                                 
317                                                 outputs[groupID] = "";
318                                                 numOutputs[groupID] = 0;
319                                                 wroteOutPut[groupID] = true;
320                                         }else {
321                                                 outputs[groupID] +=  seqA + '\t' + seqB + '\t' + toString(dist)  + '\n';
322                                                 numOutputs[groupID]++;
323                                         }
324                                         
325                                         if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above
326                                                 string row, column, distance;
327                                                 if(groupIDA<groupIDB){
328                                                         
329                                                         //merge memory
330                                                         numOutputs[groupID] += numOutputs[groupIDB];
331                                                         outputs[groupID] += outputs[groupIDB];
332                                                         
333                                                         outputs[groupIDB] = "";
334                                                         numOutputs[groupIDB] = 0;
335                                                         
336                                                         //if groupB is written to file it is above buffer size so read and write to new merged file
337                                                         if (wroteOutPut[groupIDB]) {
338                                                                 string fileName2 = distFile + "." + toString(groupIDB) + ".temp";
339                                                                 ifstream fileB(fileName2.c_str(), ios::ate);
340                                                                 
341                                                                 outFile.open(fileName.c_str(), ios::app);
342                                                                 
343                                                                 long size;
344                                                                 char* memblock;
345
346                                                                 size = fileB.tellg();
347                                 
348                                                                 fileB.seekg (0, ios::beg);
349                                                                 
350                                                                 int numRead = size / 1024;
351                                                                 int lastRead = size % 1024;
352
353                                                                 for (int i = 0; i < numRead; i++) {
354                                 
355                                                                         memblock = new char [1024];
356                                                                 
357                                                                         fileB.read (memblock, 1024);
358                                                                         
359                                                                         string temp = memblock;
360                                                                         outFile << temp.substr(0, 1024);
361                                                                         
362                                                                         delete memblock;
363                                                                 }
364                                                                 
365                                                                 memblock = new char [lastRead];
366                                                                 
367                                                                 fileB.read (memblock, lastRead);
368                                                                 
369                                                                 //not sure why but it will read more than lastRead char...??
370                                                                 string temp = memblock;
371                                                                 outFile << temp.substr(0, lastRead);
372                                                                 delete memblock;
373                                                                 
374                                                                 fileB.close();
375                                                                 remove(fileName2.c_str());
376                                                                 
377                                                                 //write out the merged memory
378                                                                 if (numOutputs[groupID] > 60) {
379                                                                         outFile << outputs[groupID];
380                                                                         outputs[groupID] = "";
381                                                                         numOutputs[groupID] = 0;
382                                                                 }
383                                                                 
384                                                                 outFile.close();
385                                                                 
386                                                                 wroteOutPut[groupID] = true;
387                                                                 wroteOutPut[groupIDB] = false;
388                                                         }else{ } //just merge b's memory with a's memory 
389                                                 }
390                                                 else{
391                                                         numOutputs[groupID] += numOutputs[groupIDA];
392                                                         outputs[groupID] += outputs[groupIDA];
393                                                         
394                                                         outputs[groupIDA] = "";
395                                                         numOutputs[groupIDA] = 0;
396                                                         
397                                                         if (wroteOutPut[groupIDA]) {
398                                                                 string fileName2 = distFile + "." + toString(groupIDA) + ".temp";
399                                                                 ifstream fileB(fileName2.c_str(), ios::ate);
400                                                                 
401                                                                 outFile.open(fileName.c_str(), ios::app);
402                                                                 
403                                                                 long size;
404                                                                 char* memblock;
405
406                                                                 size = fileB.tellg();
407                                                                                                                         
408                                                                 fileB.seekg (0, ios::beg);
409                                                                 
410                                                                 int numRead = size / 1024;
411                                                                 int lastRead = size % 1024;
412
413                                                                 for (int i = 0; i < numRead; i++) {
414                                 
415                                                                         memblock = new char [1024];
416                                                                 
417                                                                         fileB.read (memblock, 1024);
418                                                                         string temp = memblock;
419                                                                         outFile << temp.substr(0, 1024);
420                                                                         
421                                                                         delete memblock;
422                                                                 }
423                                                                 
424                                                                 memblock = new char [lastRead];
425                                                                 
426                                                                 fileB.read (memblock, lastRead);
427                                                                 
428                                                                 //not sure why but it will read more than lastRead char...??
429                                                                 string temp = memblock;
430                                                                 outFile << temp.substr(0, lastRead);
431                                                                         
432                                                                 delete memblock;
433                                                                 
434                                                                 fileB.close();
435                                                                 remove(fileName2.c_str());
436                                                                 
437                                                                 //write out the merged memory
438                                                                 if (numOutputs[groupID] > 60) {
439                                                                         outFile << outputs[groupID];
440                                                                         outputs[groupID] = "";
441                                                                         numOutputs[groupID] = 0;
442                                                                 }
443                                                                 
444                                                                 outFile.close();
445                                                                 
446                                                                 wroteOutPut[groupID] = true;
447                                                                 wroteOutPut[groupIDA] = false;
448                                                         }else { } //just merge memory
449                                                 }                                       
450                                         }
451                                 }
452                         }
453                         gobble(dFile);
454                 }
455                 dFile.close();
456                 
457                 for (int i = 0; i < numGroups; i++) {
458                         if (numOutputs[i] > 0) {
459                                 string fileName = distFile + "." + toString(i) + ".temp";
460                                 outFile.open(fileName.c_str(), ios::app);
461                                 outFile << outputs[i];
462                                 outFile.close();
463                         }
464                 }
465
466                 splitNames(groups);
467                                 
468                 return 0;                       
469         }
470         catch(exception& e) {
471                 m->errorOut(e, "SplitMatrix", "splitDistanceLarge");
472                 exit(1);
473         }
474 }
475 //********************************************************************************************************************
476 int SplitMatrix::splitNames(vector<set<string> >& groups){
477         try {
478                 int numGroups = groups.size();
479         
480                 ifstream bigNameFile(namefile.c_str());
481                 if(!bigNameFile){
482                         cerr << "Error: We can't open the name file\n";
483                         exit(1);
484                 }
485                 
486                 map<string, string> nameMap;
487                 string name, nameList;
488                 while(bigNameFile){
489                         bigNameFile >> name >> nameList;
490                         nameMap[name] = nameList;
491                         gobble(bigNameFile);
492                 }
493                 bigNameFile.close();
494                         
495                 for(int i=0;i<numGroups;i++){  //parse names file to match distance files
496                         int numSeqsInGroup = groups[i].size();
497                         
498                         if(numSeqsInGroup > 0){
499                                 string fileName = namefile + "." + toString(i) + ".temp";
500                                 ofstream smallNameFile(fileName.c_str(), ios::ate);
501                                 
502                                 for(set<string>::iterator gIt=groups[i].begin();gIt!=groups[i].end();gIt++){
503                                         map<string,string>::iterator nIt = nameMap.find(*gIt);
504                                         if (nIt != nameMap.end()) {
505                                                 smallNameFile << nIt->first << '\t' << nIt->second << endl;
506                                                 nameMap.erase(nIt);
507                                         }else{
508                                                 m->mothurOut((*gIt) + " is in your distance file and not in your namefile.  Please correct."); m->mothurOutEndLine(); exit(1);
509                                         }
510                                 }
511                                 smallNameFile.close();
512                         }
513                 }
514                 
515                 //names of singletons
516                 if (nameMap.size() != 0) {
517                         singleton = namefile + ".extra.temp";
518                         ofstream remainingNames(singleton.c_str(), ios::ate);
519                         for(map<string,string>::iterator nIt=nameMap.begin();nIt!=nameMap.end();nIt++){
520                                 remainingNames << nIt->first << '\t' << nIt->second << endl;
521                         }
522                         remainingNames.close();
523                 }else { singleton = "none"; }
524                         
525                 for(int i=0;i<numGroups;i++){
526                         if(groups[i].size() > 0){
527                                 string tempNameFile = namefile + "." + toString(i) + ".temp";
528                                 string tempDistFile = distFile + "." + toString(i) + ".temp";
529                                 
530                                 map<string, string> temp;
531                                 temp[tempDistFile] = tempNameFile;
532                                 dists.push_back(temp);
533                         }
534                 }
535                 
536                 if (m->control_pressed)  {  
537                         for (int i = 0; i < dists.size(); i++) { 
538                                 remove((dists[i].begin()->first).c_str());
539                                 remove((dists[i].begin()->second).c_str());
540                         }
541                         dists.clear();
542                 }
543                 
544                 return 0;
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "SplitMatrix", "splitNames");
548                 exit(1);
549         }
550 }
551 //********************************************************************************************************************
552 int SplitMatrix::splitDistanceRAM(){
553         try {
554                 vector<set<string> > groups;
555                 vector<string> outputs;
556                 
557                 int numGroups = 0;
558
559                 ifstream dFile;
560                 openInputFile(distFile, dFile);
561
562                 while(dFile){
563                         string seqA, seqB;
564                         float dist;
565
566                         dFile >> seqA >> seqB >> dist;
567                         
568                         if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
569                                         
570                         if(dist < cutoff){
571                                 //cout << "in cutoff: " << dist << endl;
572                                 int groupIDA = -1;
573                                 int groupIDB = -1;
574                                 int groupID = -1;
575                                 
576                                 for(int i=0;i<numGroups;i++){
577                                         set<string>::iterator aIt = groups[i].find(seqA);
578                                         set<string>::iterator bIt = groups[i].find(seqB);
579                                         
580                                         if(groupIDA == -1 && aIt != groups[i].end()){//seqA is not already assigned to a group and is in group[i], so assign seqB to group[i]
581                                                 groups[i].insert(seqB);
582                                                 groupIDA = i;
583                                                 groupID = groupIDA;
584
585                                                 //cout << "in aIt: " << groupID << endl;
586         //                                      break;
587                                         }
588                                         else if(groupIDB == -1 && bIt != groups[i].end()){//seqB is not already assigned to a group and is in group[i], so assign seqA to group[i]
589                                                 groups[i].insert(seqA);
590                                                 groupIDB = i;
591                                                 groupID = groupIDB;
592
593                                         //      cout << "in bIt: " << groupID << endl;
594         //                                      break;
595                                         }
596                                 
597                                         if(groupIDA != -1 && groupIDB != -1){//both ifs above have been executed, so we need to decide who to assign them to
598                                                 if(groupIDA < groupIDB){
599                                                 //      cout << "A: " << groupIDA << "\t" << groupIDB << endl;
600                                                         groups[groupIDA].insert(groups[groupIDB].begin(), groups[groupIDB].end()); //merge two groups into groupIDA
601                                                         groups[groupIDB].clear(); 
602                                                         groupID = groupIDA;
603                                                 }
604                                                 else{
605                                                 //      cout << "B: " << groupIDA << "\t" << groupIDB << endl;
606                                                         groups[groupIDB].insert(groups[groupIDA].begin(), groups[groupIDA].end()); //merge two groups into groupIDB
607                                                         groups[groupIDA].clear();  
608                                                         groupID = groupIDB;
609                                                 }
610                                                 break;
611                                         }
612                                 }
613                                 
614         //windows is gonna gag on the reuse of outFile, will need to make it local...
615                                 
616                                 if(groupIDA == -1 && groupIDB == -1){ //we need a new group
617                                         set<string> newGroup;
618                                         newGroup.insert(seqA);
619                                         newGroup.insert(seqB);
620                                         groups.push_back(newGroup);
621                                                                         
622                                         string tempOut = seqA + '\t' + seqB + '\t' + toString(dist) + '\n';
623                                         outputs.push_back(tempOut);
624                                         numGroups++;
625                                 }
626                                 else{
627                                                                                         
628                                         outputs[groupID] +=  seqA + '\t' + seqB + '\t' + toString(dist)  + '\n';
629                                         
630                                         if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above
631                                                 string row, column, distance;
632                                                 if(groupIDA<groupIDB){
633                                                         //merge memory
634                                                         outputs[groupID] += outputs[groupIDB];
635                                                         outputs[groupIDB] = "";
636                                                 }else{
637                                                         outputs[groupID] += outputs[groupIDA];
638                                                         outputs[groupIDA] = "";
639                                                 }                                       
640                                         }
641                                 }
642                         }
643                         gobble(dFile);
644                 }
645                 dFile.close();
646                 
647                 for (int i = 0; i < numGroups; i++) {
648                         if (outputs[i] != "") {
649                                 ofstream outFile;
650                                 string fileName = distFile + "." + toString(i) + ".temp";
651                                 outFile.open(fileName.c_str(), ios::ate);
652                                 outFile << outputs[i];
653                                 outFile.close();
654                         }
655                 }
656
657                 splitNames(groups);
658                                 
659                 return 0;                       
660         }
661         catch(exception& e) {
662                 m->errorOut(e, "SplitMatrix", "splitDistanceRAM");
663                 exit(1);
664         }
665 }
666 //********************************************************************************************************************
667 //sorts biggest to smallest
668 inline bool compareFileSizes(map<string, string> left, map<string, string> right){
669         
670         FILE * pFile;
671         long leftsize = 0;
672                 
673         //get num bytes in file
674         string filename = left.begin()->first;
675         pFile = fopen (filename.c_str(),"rb");
676         string error = "Error opening " + filename;
677         if (pFile==NULL) perror (error.c_str());
678         else{
679                 fseek (pFile, 0, SEEK_END);
680                 leftsize=ftell (pFile);
681                 fclose (pFile);
682         }
683
684         FILE * pFile2;
685         long rightsize = 0;
686                 
687         //get num bytes in file
688         filename = right.begin()->first;
689         pFile2 = fopen (filename.c_str(),"rb");
690         error = "Error opening " + filename;
691         if (pFile2==NULL) perror (error.c_str());
692         else{
693                 fseek (pFile2, 0, SEEK_END);
694                 rightsize=ftell (pFile2);
695                 fclose (pFile2);
696         }
697
698         return (leftsize > rightsize);  
699
700 /***********************************************************************/
701 //returns map of distance files -> namefile sorted by distance file size
702 vector< map< string, string> > SplitMatrix::getDistanceFiles(){
703         try {   
704                 
705                 sort(dists.begin(), dists.end(), compareFileSizes);
706                 
707                 return dists;
708         }
709         catch(exception& e) {
710                 m->errorOut(e, "SplitMatrix", "getDistanceFiles");
711                 exit(1);
712         }
713 }
714 /***********************************************************************/
715 SplitMatrix::~SplitMatrix(){}
716 /***********************************************************************/
717