]> git.donarmstrong.com Git - mothur.git/blobdiff - splitmatrix.cpp
changes while testing
[mothur.git] / splitmatrix.cpp
index 9e53c51a8f1b6fbba8e755279e68993fb7f03fa5..f6b5c4d81cf5446adc5e5253808380be7ebbf28d 100644 (file)
 #include "splitmatrix.h"
 #include "phylotree.h"
 #include "distancecommand.h"
+#include "seqsummarycommand.h"
 
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, string t, bool l){
+SplitMatrix::SplitMatrix(string distfile, string name, string count, string tax, float c, string t, bool l){
        m = MothurOut::getInstance();
        distFile = distfile;
        cutoff = c;
        namefile = name;
        method = t;
        taxFile = tax;
+    countfile = count;
        large = l;
 }
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p){
+SplitMatrix::SplitMatrix(string ffile, string name, string count, string tax, float c, float cu, string t, int p, bool cl, string output){
        m = MothurOut::getInstance();
        fastafile = ffile;
        namefile = name;
+    countfile = count;
        taxFile = tax;
-       cutoff = c;
+       cutoff = c;  //tax level cutoff
+       distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
        method = t;
        processors = p;
+    classic = cl;
+       outputDir = output;
 }
 
 /***********************************************************************/
@@ -46,7 +52,8 @@ int SplitMatrix::split(){
                }else {
                        m->mothurOut("Unknown splitting method, aborting split."); m->mothurOutEndLine();
                        map<string, string> temp;
-                       temp[distFile] = namefile;
+                       if (namefile != "") {  temp[distFile] = namefile; }
+            else { temp[distFile] = countfile; }
                        dists.push_back(temp);
                }
                
@@ -63,6 +70,8 @@ int SplitMatrix::splitDistance(){
         
                if (large)      { splitDistanceLarge(); }
                else            { splitDistanceRAM();   }
+               
+               return 0;
                        
        }
        catch(exception& e) {
@@ -75,7 +84,7 @@ int SplitMatrix::splitDistance(){
 int SplitMatrix::splitClassify(){
        try {
                cutoff = int(cutoff);
-               
+                               
                map<string, int> seqGroup;
                map<string, int>::iterator it;
                map<string, int>::iterator it2;
@@ -85,16 +94,13 @@ int SplitMatrix::splitClassify(){
                //build tree from users taxonomy file
                PhyloTree* phylo = new PhyloTree();
                
-               ifstream in;
-               openInputFile(taxFile, in);
-                       
-               //read in users taxonomy file and add sequences to tree
-               string seqname, tax;
-               while(!in.eof()){
-                       in >> seqname >> tax; gobble(in);
-                       phylo->addSeqToTree(seqname, tax);
-               }
-               in.close();
+        map<string, string> temp;
+        m->readTax(taxFile, temp);
+        
+        for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+            phylo->addSeqToTree(itTemp->first, itTemp->second);
+            temp.erase(itTemp++);
+        }
                
                phylo->assignHeirarchyIDs(0);
 
@@ -141,29 +147,29 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                set<string> names;
                                
                for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
-                       remove((fastafile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
                }
                        
                ifstream in;
-               openInputFile(fastafile, in);
+               m->openInputFile(fastafile, in);
        
                //parse fastafile
                ofstream outFile;
                while (!in.eof()) {
-                       Sequence query(in); gobble(in);
+                       Sequence query(in); m->gobble(in);
                        if (query.getName() != "") {
                
                                it = seqGroup.find(query.getName());
                                
                                //save names in case no namefile is given
-                               if (namefile == "") {  names.insert(query.getName()); }
+                               if ((namefile == "") && (countfile == "")) {  names.insert(query.getName()); }
                        
                                if (it != seqGroup.end()) { //not singleton 
-                                       openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
+                                       m->openOutputFileAppend((fastafile + "." + toString(it->second) + ".temp"), outFile);
                                        query.printSequence(outFile); 
                                        outFile.close();
                                        
-                                       copyGroups.erase(it);
+                                       copyGroups.erase(query.getName());
                                }
                        }
                }
@@ -176,74 +182,46 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                }
                
                copyGroups.clear();
-               
+        
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
-                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors);
+                       string options = "";
+            if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+            else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
+                       if (outputDir != "") { options += ", outputdir=" + outputDir; }
                        
+            m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+            
                        Command* command = new DistanceCommand(options);
+                       
+            m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+            
                        command->execute();
                        delete command;
                        
-                       remove((fastafile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
                        
                        //remove old names files just in case
-                       remove((namefile + "." + toString(i) + ".temp").c_str());
+                       if (namefile != "") { m->mothurRemove((namefile + "." + toString(i) + ".temp")); }
+            else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
                }
-               
-               singleton = namefile + ".extra.temp";
-               ofstream remainingNames;
-               openOutputFile(singleton, remainingNames);
-               
-               bool wroteExtra = false;
-
-               ifstream bigNameFile;
-               openInputFile(namefile, bigNameFile);
-               
-               string name, nameList;
-               while(!bigNameFile.eof()){
-                       bigNameFile >> name >> nameList;  gobble(bigNameFile);
-                       
-                       //did this sequence get assigned a group
-                       it = seqGroup.find(name);
-                       
-                       if (it != seqGroup.end()) {  
-                               openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
-                               outFile << name << '\t' << nameList << endl;
-                               outFile.close();
-                       }else{
-                               wroteExtra = true;
-                               remainingNames << name << '\t' << nameList << endl;
-                       }
-               }
-               bigNameFile.close();
-               
-               remainingNames.close();
-               if (!wroteExtra) { 
-                       remove(singleton.c_str());
-                       singleton = "none";
-               }
-
-               for(int i=0;i<numGroups;i++){
-                       string tempNameFile = namefile + "." + toString(i) + ".temp";
-                       string tempDistFile = getRootName(getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
-
-                       //if there are valid distances
-                       ifstream fileHandle;
-                       fileHandle.open(tempDistFile.c_str());
-                       if(fileHandle)  {       
-                               gobble(fileHandle);
-                               if (!fileHandle.eof()) {  //check for blank file
-                                       map<string, string> temp;
-                                       temp[tempDistFile] = tempNameFile;
-                                       dists.push_back(temp);
-                               }
-                       }
-                       fileHandle.close();
-               }
-               
-               if (m->control_pressed)  {  for (int i = 0; i < dists.size(); i++) { remove((dists[i].begin()->first).c_str()); remove((dists[i].begin()->second).c_str()); } dists.clear(); }
+        
+        //restore old fasta file name since dist.seqs overwrites it with the temp files
+        m->setFastaFile(fastafile);
+        
+        vector<string> tempDistFiles;    
+        for(int i=0;i<numGroups;i++){
+            if (outputDir == "") { outputDir = m->hasPath(fastafile); }
+            string tempDistFile = "";
+            if (classic) { tempDistFile =  outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+            else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
+            tempDistFiles.push_back(tempDistFile);
+        }
+        
+        splitNames(seqGroup, numGroups, tempDistFiles);
+        
+               if (m->control_pressed)  {  for (int i = 0; i < dists.size(); i++) { m->mothurRemove((dists[i].begin()->first)); m->mothurRemove((dists[i].begin()->second)); } dists.clear(); }
                
                return 0;
        }
@@ -258,12 +236,13 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                map<string, int>::iterator it;
                map<string, int>::iterator it2;
                
+        ofstream outFile;
                ifstream dFile;
-               openInputFile(distFile, dFile);
-               ofstream outFile;
+               m->openInputFile(distFile, dFile);
+               
                
                for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
-                       remove((distFile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((distFile + "." + toString(i) + ".temp"));
                }
                
                //for buffering the io to improve speed
@@ -280,9 +259,9 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                        string seqA, seqB;
                        float dist;
                        
-                       if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str());        } }
+                       if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { m->mothurRemove((distFile + "." + toString(i) + ".temp"));       } }
                        
-                       dFile >> seqA >> seqB >> dist;  gobble(dFile);
+                       dFile >> seqA >> seqB >> dist;  m->gobble(dFile);
                        
                        //if both sequences are in the same group then they are within the cutoff
                        it = seqGroup.find(seqA);
@@ -291,7 +270,7 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                        if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons 
                                if (it->second == it2->second) { //they are from the same group so add the distance
                                        if (numOutputs[it->second] > 30) {
-                                               openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+                                               m->openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
                                                outFile << outputs[it->second] << seqA << '\t' << seqB << '\t' << dist << endl;
                                                outFile.close();
                                                outputs[it->second] = "";
@@ -305,13 +284,19 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                        }
                }
                dFile.close();
-       
+        
+        string inputFile = namefile;
+        if (countfile != "") { inputFile = countfile; }
+        
+        vector<string> tempDistFiles;
                for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
-                       remove((namefile + "." + toString(i) + ".temp").c_str());
+            string tempDistFile = distFile + "." + toString(i) + ".temp";
+            tempDistFiles.push_back(tempDistFile);
+                       m->mothurRemove((inputFile + "." + toString(i) + ".temp"));
                        
                        //write out any remaining buffers
                        if (numOutputs[i] > 0) {
-                               openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
+                               m->openOutputFileAppend((distFile + "." + toString(i) + ".temp"), outFile);
                                outFile << outputs[i];
                                outFile.close();
                                outputs[i] = "";
@@ -320,67 +305,12 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                        }
                }
                
-               ifstream bigNameFile;
-               openInputFile(namefile, bigNameFile);
-               
-               singleton = namefile + ".extra.temp";
-               ofstream remainingNames;
-               openOutputFile(singleton, remainingNames);
-               
-               bool wroteExtra = false;
-                                               
-               string name, nameList;
-               while(!bigNameFile.eof()){
-                       bigNameFile >> name >> nameList;  gobble(bigNameFile);
-                       
-                       //did this sequence get assigned a group
-                       it = seqGroup.find(name);
-                       
-                       if (it != seqGroup.end()) {  
-                               openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
-                               outFile << name << '\t' << nameList << endl;
-                               outFile.close();
-                       }else{
-                               wroteExtra = true;
-                               remainingNames << name << '\t' << nameList << endl;
-                       }
-               }
-               bigNameFile.close();
-                               
-               for(int i=0;i<numGroups;i++){
-                       string tempNameFile = namefile + "." + toString(i) + ".temp";
-                       string tempDistFile = distFile + "." + toString(i) + ".temp";
-
-                       //if there are valid distances
-                       if (validDistances[i]) {
-                               map<string, string> temp;
-                               temp[tempDistFile] = tempNameFile;
-                               dists.push_back(temp);
-                       }else{
-                               ifstream in;
-                               openInputFile(tempNameFile, in);
-                               
-                               while(!in.eof()) { 
-                                       in >> name >> nameList;  gobble(in);
-                                       wroteExtra = true;
-                                       remainingNames << name << '\t' << nameList << endl;
-                               }
-                               in.close();
-                               remove(tempNameFile.c_str());
-                       }
-               }
-               
-               remainingNames.close();
-               
-               if (!wroteExtra) { 
-                       remove(singleton.c_str());
-                       singleton = "none";
-               }
-
+        splitNames(seqGroup, numGroups, tempDistFiles);
+        
                if (m->control_pressed)  {  
                        for (int i = 0; i < dists.size(); i++) { 
-                               remove((dists[i].begin()->first).c_str());
-                               remove((dists[i].begin()->second).c_str());
+                               m->mothurRemove((dists[i].begin()->first));
+                               m->mothurRemove((dists[i].begin()->second));
                        }
                        dists.clear();
                }
@@ -405,9 +335,9 @@ int SplitMatrix::splitDistanceLarge(){
                
                int numGroups = 0;
 
-               ofstream outFile;
+               //ofstream outFile;
                ifstream dFile;
-               openInputFile(distFile, dFile);
+               m->openInputFile(distFile, dFile);
        
                while(dFile){
                        string seqA, seqB;
@@ -415,7 +345,7 @@ int SplitMatrix::splitDistanceLarge(){
 
                        dFile >> seqA >> seqB >> dist;
                        
-                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
+                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  m->mothurRemove((distFile + "." + toString(i) + ".temp")); }  } return 0; }
                                        
                        if(dist < cutoff){
                                //cout << "in cutoff: " << dist << endl;
@@ -481,6 +411,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                                                        
                                        //have we reached the max buffer size
                                        if (numOutputs[groupID] > 60) { //write out sequence
+                        ofstream outFile;
                                                outFile.open(fileName.c_str(), ios::app);
                                                outFile << outputs[groupID] << seqA << '\t' << seqB << '\t' << dist << endl;
                                                outFile.close();
@@ -507,7 +438,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                        //if groupB is written to file it is above buffer size so read and write to new merged file
                                                        if (wroteOutPut[groupIDB]) {
                                                                string fileName2 = distFile + "." + toString(groupIDB) + ".temp";
-                                                               ifstream fileB(fileName2.c_str(), ios::ate);
+                                                               /*ifstream fileB(fileName2.c_str(), ios::ate);
                                                                
                                                                outFile.open(fileName.c_str(), ios::app);
                                                                
@@ -542,17 +473,22 @@ int SplitMatrix::splitDistanceLarge(){
                                                                outFile << temp.substr(0, lastRead);
                                                                delete memblock;
                                                                
-                                                               fileB.close();
-                                                               remove(fileName2.c_str());
+                                                               fileB.close();*/
+                                m->appendFiles(fileName2, fileName);
+                                                               m->mothurRemove(fileName2);
+                        
                                                                
                                                                //write out the merged memory
                                                                if (numOutputs[groupID] > 60) {
-                                                                       outFile << outputs[groupID];
+                                    ofstream tempOut;
+                                    m->openOutputFile(fileName, tempOut);
+                                                                       tempOut << outputs[groupID];
                                                                        outputs[groupID] = "";
                                                                        numOutputs[groupID] = 0;
+                                    tempOut.close();
                                                                }
                                                                
-                                                               outFile.close();
+                                                               //outFile.close();
                                                                
                                                                wroteOutPut[groupID] = true;
                                                                wroteOutPut[groupIDB] = false;
@@ -567,7 +503,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                        
                                                        if (wroteOutPut[groupIDA]) {
                                                                string fileName2 = distFile + "." + toString(groupIDA) + ".temp";
-                                                               ifstream fileB(fileName2.c_str(), ios::ate);
+                                                               /*ifstream fileB(fileName2.c_str(), ios::ate);
                                                                
                                                                outFile.open(fileName.c_str(), ios::app);
                                                                
@@ -602,17 +538,21 @@ int SplitMatrix::splitDistanceLarge(){
                                                                        
                                                                delete memblock;
                                                                
-                                                               fileB.close();
-                                                               remove(fileName2.c_str());
+                                                               fileB.close();*/
+                                m->appendFiles(fileName2, fileName);
+                                                               m->mothurRemove(fileName2);
                                                                
                                                                //write out the merged memory
                                                                if (numOutputs[groupID] > 60) {
-                                                                       outFile << outputs[groupID];
+                                    ofstream tempOut;
+                                    m->openOutputFile(fileName, tempOut);
+                                                                       tempOut << outputs[groupID];
                                                                        outputs[groupID] = "";
                                                                        numOutputs[groupID] = 0;
+                                    tempOut.close();
                                                                }
                                                                
-                                                               outFile.close();
+                                                               //outFile.close();
                                                                
                                                                wroteOutPut[groupID] = true;
                                                                wroteOutPut[groupIDA] = false;
@@ -621,20 +561,33 @@ int SplitMatrix::splitDistanceLarge(){
                                        }
                                }
                        }
-                       gobble(dFile);
+                       m->gobble(dFile);
                }
                dFile.close();
-               
+        
+               vector<string> tempDistFiles;
                for (int i = 0; i < numGroups; i++) {
+            string fileName = distFile + "." + toString(i) + ".temp";
+            tempDistFiles.push_back(fileName);
+            //remove old names files just in case
+                       
                        if (numOutputs[i] > 0) {
-                               string fileName = distFile + "." + toString(i) + ".temp";
+                ofstream outFile;
                                outFile.open(fileName.c_str(), ios::app);
                                outFile << outputs[i];
                                outFile.close();
                        }
                }
-
-               splitNames(groups);
+        
+        map<string, int> seqGroup;
+        for (int i = 0; i < groups.size(); i++) {
+            for (set<string>::iterator itNames = groups[i].begin(); itNames != groups[i].end();) {
+                seqGroup[*itNames] = i;
+                groups[i].erase(itNames++);
+            }
+        }
+        
+               splitNames(seqGroup, numGroups, tempDistFiles);
                                
                return 0;                       
        }
@@ -644,73 +597,104 @@ int SplitMatrix::splitDistanceLarge(){
        }
 }
 //********************************************************************************************************************
-int SplitMatrix::splitNames(vector<set<string> >& groups){
+int SplitMatrix::splitNames(map<string, int>& seqGroup, int numGroups, vector<string>& tempDistFiles){
        try {
-               int numGroups = groups.size();
-       
-               ifstream bigNameFile(namefile.c_str());
-               if(!bigNameFile){
-                       cerr << "Error: We can't open the name file\n";
-                       exit(1);
-               }
-               
-               map<string, string> nameMap;
-               string name, nameList;
-               while(bigNameFile){
-                       bigNameFile >> name >> nameList;
-                       nameMap[name] = nameList;
-                       gobble(bigNameFile);
-               }
-               bigNameFile.close();
-                       
-               for(int i=0;i<numGroups;i++){  //parse names file to match distance files
-                       int numSeqsInGroup = groups[i].size();
-                       
-                       if(numSeqsInGroup > 0){
-                               string fileName = namefile + "." + toString(i) + ".temp";
-                               ofstream smallNameFile(fileName.c_str(), ios::ate);
-                               
-                               for(set<string>::iterator gIt=groups[i].begin();gIt!=groups[i].end();gIt++){
-                                       map<string,string>::iterator nIt = nameMap.find(*gIt);
-                                       if (nIt != nameMap.end()) {
-                                               smallNameFile << nIt->first << '\t' << nIt->second << endl;
-                                               nameMap.erase(nIt);
-                                       }else{
-                                               m->mothurOut((*gIt) + " is in your distance file and not in your namefile.  Please correct."); m->mothurOutEndLine(); exit(1);
-                                       }
-                               }
-                               smallNameFile.close();
-                       }
-               }
-               
-               //names of singletons
-               if (nameMap.size() != 0) {
-                       singleton = namefile + ".extra.temp";
-                       ofstream remainingNames(singleton.c_str(), ios::ate);
-                       for(map<string,string>::iterator nIt=nameMap.begin();nIt!=nameMap.end();nIt++){
-                               remainingNames << nIt->first << '\t' << nIt->second << endl;
-                       }
-                       remainingNames.close();
-               }else { singleton = "none"; }
-                       
+        ofstream outFile;
+        map<string, int>::iterator it;
+        
+        string inputFile = namefile;
+        if (countfile != "") { inputFile = countfile; }
+        
+        for(int i=0;i<numGroups;i++){  m->mothurRemove((inputFile + "." + toString(i) + ".temp")); }
+
+        singleton = inputFile + ".extra.temp";
+        ofstream remainingNames;
+        m->openOutputFile(singleton, remainingNames);
+        
+        bool wroteExtra = false;
+        
+        ifstream bigNameFile;
+        m->openInputFile(inputFile, bigNameFile);
+        
+        //grab header line 
+        string headers = "";
+        if (countfile != "") { headers = m->getline(bigNameFile); m->gobble(bigNameFile); }
+        
+        string name, nameList;
+        while(!bigNameFile.eof()){
+            bigNameFile >> name >> nameList;  
+            m->getline(bigNameFile); m->gobble(bigNameFile); //extra getline is for rest of countfile line if groups are given.
+            
+            //did this sequence get assigned a group
+            it = seqGroup.find(name);
+            
+            if (it != seqGroup.end()) {  
+                m->openOutputFileAppend((inputFile + "." + toString(it->second) + ".temp"), outFile);
+                outFile << name << '\t' << nameList << endl;
+                outFile.close();
+            }else{
+                wroteExtra = true;
+                remainingNames << name << '\t' << nameList << endl;
+            }
+        }
+        bigNameFile.close();
+        
                for(int i=0;i<numGroups;i++){
-                       if(groups[i].size() > 0){
-                               string tempNameFile = namefile + "." + toString(i) + ".temp";
-                               string tempDistFile = distFile + "." + toString(i) + ".temp";
-                               
+                       string tempNameFile = inputFile + "." + toString(i) + ".temp";
+                       string tempDistFile = tempDistFiles[i];
+            
+            //if there are valid distances
+            ifstream fileHandle;
+            fileHandle.open(tempDistFile.c_str());
+            if(fileHandle)     {       
+                m->gobble(fileHandle);
+                if (!fileHandle.eof()) {  //check
                                map<string, string> temp;
+                if (countfile != "") {
+                    //add header
+                    ofstream out;
+                    string newtempNameFile = tempNameFile + "2";
+                    m->openOutputFile(newtempNameFile, out);
+                    out << "Representative_Sequence\ttotal" << endl;
+                    out.close();
+                    m->appendFiles(tempNameFile, newtempNameFile);
+                    m->mothurRemove(tempNameFile);
+                    m->renameFile(newtempNameFile, tempNameFile);
+                }
                                temp[tempDistFile] = tempNameFile;
                                dists.push_back(temp);
+                       }else{
+                               ifstream in;
+                               m->openInputFile(tempNameFile, in);
+                               
+                               while(!in.eof()) { 
+                                       in >> name >> nameList;  m->gobble(in);
+                                       wroteExtra = true;
+                                       remainingNames << name << '\t' << nameList << endl;
+                               }
+                               in.close();
+                               m->mothurRemove(tempNameFile);
                        }
+            }
+            fileHandle.close();
                }
                
-               if (m->control_pressed)  {  
-                       for (int i = 0; i < dists.size(); i++) { 
-                               remove((dists[i].begin()->first).c_str());
-                               remove((dists[i].begin()->second).c_str());
-                       }
-                       dists.clear();
-               }
+               remainingNames.close();
+               
+               if (!wroteExtra) { 
+                       m->mothurRemove(singleton);
+                       singleton = "none";
+               }else if (countfile != "") {
+            //add header
+            ofstream out;
+            string newtempNameFile = singleton + "2";
+            m->openOutputFile(newtempNameFile, out);
+            out << "Representative_Sequence\ttotal" << endl; 
+            out.close();
+            m->appendFiles(singleton, newtempNameFile);
+            m->mothurRemove(singleton);
+            m->renameFile(newtempNameFile, singleton);
+        }
                
                return 0;
        }
@@ -728,7 +712,7 @@ int SplitMatrix::splitDistanceRAM(){
                int numGroups = 0;
 
                ifstream dFile;
-               openInputFile(distFile, dFile);
+               m->openInputFile(distFile, dFile);
 
                while(dFile){
                        string seqA, seqB;
@@ -736,7 +720,7 @@ int SplitMatrix::splitDistanceRAM(){
 
                        dFile >> seqA >> seqB >> dist;
                        
-                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
+                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  m->mothurRemove((distFile + "." + toString(i) + ".temp")); }  } return 0; }
                                        
                        if(dist < cutoff){
                                //cout << "in cutoff: " << dist << endl;
@@ -811,21 +795,31 @@ int SplitMatrix::splitDistanceRAM(){
                                        }
                                }
                        }
-                       gobble(dFile);
+                       m->gobble(dFile);
                }
                dFile.close();
                
+        vector<string> tempDistFiles;
                for (int i = 0; i < numGroups; i++) {
+            string fileName = distFile + "." + toString(i) + ".temp";
+            tempDistFiles.push_back(fileName);
                        if (outputs[i] != "") {
                                ofstream outFile;
-                               string fileName = distFile + "." + toString(i) + ".temp";
                                outFile.open(fileName.c_str(), ios::ate);
                                outFile << outputs[i];
                                outFile.close();
                        }
                }
-
-               splitNames(groups);
+        
+        map<string, int> seqGroup;
+        for (int i = 0; i < groups.size(); i++) {
+            for (set<string>::iterator itNames = groups[i].begin(); itNames != groups[i].end();) {
+                seqGroup[*itNames] = i;
+                groups[i].erase(itNames++);
+            }
+        }
+        
+               splitNames(seqGroup, numGroups, tempDistFiles);
                                
                return 0;                       
        }