]> git.donarmstrong.com Git - mothur.git/blobdiff - splitmatrix.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / splitmatrix.cpp
index dcc5b50ef8b396d9186c755c3ba949f9c789cf64..384b09af1bb94be09c5607c8b863b28b16215731 100644 (file)
@@ -10,6 +10,7 @@
 #include "splitmatrix.h"
 #include "phylotree.h"
 #include "distancecommand.h"
+#include "seqsummarycommand.h"
 
 /***********************************************************************/
 
@@ -24,7 +25,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri
 }
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, bool cl, string output){
        m = MothurOut::getInstance();
        fastafile = ffile;
        namefile = name;
@@ -33,6 +34,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float c
        distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
        method = t;
        processors = p;
+    classic = cl;
        outputDir = output;
 }
 
@@ -65,6 +67,8 @@ int SplitMatrix::splitDistance(){
         
                if (large)      { splitDistanceLarge(); }
                else            { splitDistanceRAM();   }
+               
+               return 0;
                        
        }
        catch(exception& e) {
@@ -77,7 +81,7 @@ int SplitMatrix::splitDistance(){
 int SplitMatrix::splitClassify(){
        try {
                cutoff = int(cutoff);
-               
+                               
                map<string, int> seqGroup;
                map<string, int>::iterator it;
                map<string, int>::iterator it2;
@@ -87,16 +91,13 @@ int SplitMatrix::splitClassify(){
                //build tree from users taxonomy file
                PhyloTree* phylo = new PhyloTree();
                
-               ifstream in;
-               m->openInputFile(taxFile, in);
-                       
-               //read in users taxonomy file and add sequences to tree
-               string seqname, tax;
-               while(!in.eof()){
-                       in >> seqname >> tax; m->gobble(in);
-                       phylo->addSeqToTree(seqname, tax);
-               }
-               in.close();
+        map<string, string> temp;
+        m->readTax(taxFile, temp);
+        
+        for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+            phylo->addSeqToTree(itTemp->first, itTemp->second);
+            temp.erase(itTemp++);
+        }
                
                phylo->assignHeirarchyIDs(0);
 
@@ -143,7 +144,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                set<string> names;
                                
                for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
-                       remove((fastafile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
                }
                        
                ifstream in;
@@ -182,18 +183,22 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
-                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
+                       string options = "";
+            if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+            else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
+                       if (outputDir != "") { options += ", outputdir=" + outputDir; }
                        
                        Command* command = new DistanceCommand(options);
+                       
                        command->execute();
                        delete command;
                        
-                       remove((fastafile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((fastafile + "." + toString(i) + ".temp"));
                        
                        //remove old names files just in case
-                       remove((namefile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((namefile + "." + toString(i) + ".temp"));
                }
-               
+                       
                singleton = namefile + ".extra.temp";
                ofstream remainingNames;
                m->openOutputFile(singleton, remainingNames);
@@ -224,7 +229,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                for(int i=0;i<numGroups;i++){
                        string tempNameFile = namefile + "." + toString(i) + ".temp";
                        if (outputDir == "") { outputDir = m->hasPath(fastafile); }
-                       string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+                       string tempDistFile = "";
+            if (classic) { tempDistFile =  outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+            else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
 
                        //if there are valid distances
                        ifstream fileHandle;
@@ -245,7 +252,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                                                remainingNames << name << '\t' << nameList << endl;
                                        }
                                        in.close();
-                                       remove(tempNameFile.c_str());
+                                       m->mothurRemove(tempNameFile);
                                }
                        }
                        fileHandle.close();
@@ -253,11 +260,11 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                
                remainingNames.close();
                if (!wroteExtra) { 
-                       remove(singleton.c_str());
+                       m->mothurRemove(singleton);
                        singleton = "none";
                }
 
-               if (m->control_pressed)  {  for (int i = 0; i < dists.size(); i++) { remove((dists[i].begin()->first).c_str()); remove((dists[i].begin()->second).c_str()); } dists.clear(); }
+               if (m->control_pressed)  {  for (int i = 0; i < dists.size(); i++) { m->mothurRemove((dists[i].begin()->first)); m->mothurRemove((dists[i].begin()->second)); } dists.clear(); }
                
                return 0;
        }
@@ -277,7 +284,7 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                ofstream outFile;
                
                for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
-                       remove((distFile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((distFile + "." + toString(i) + ".temp"));
                }
                
                //for buffering the io to improve speed
@@ -294,7 +301,7 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                        string seqA, seqB;
                        float dist;
                        
-                       if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str());        } }
+                       if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { m->mothurRemove((distFile + "." + toString(i) + ".temp"));       } }
                        
                        dFile >> seqA >> seqB >> dist;  m->gobble(dFile);
                        
@@ -321,7 +328,7 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                dFile.close();
        
                for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
-                       remove((namefile + "." + toString(i) + ".temp").c_str());
+                       m->mothurRemove((namefile + "." + toString(i) + ".temp"));
                        
                        //write out any remaining buffers
                        if (numOutputs[i] > 0) {
@@ -380,21 +387,21 @@ int SplitMatrix::splitDistanceFileByTax(map<string, int>& seqGroup, int numGroup
                                        remainingNames << name << '\t' << nameList << endl;
                                }
                                in.close();
-                               remove(tempNameFile.c_str());
+                               m->mothurRemove(tempNameFile);
                        }
                }
                
                remainingNames.close();
                
                if (!wroteExtra) { 
-                       remove(singleton.c_str());
+                       m->mothurRemove(singleton);
                        singleton = "none";
                }
 
                if (m->control_pressed)  {  
                        for (int i = 0; i < dists.size(); i++) { 
-                               remove((dists[i].begin()->first).c_str());
-                               remove((dists[i].begin()->second).c_str());
+                               m->mothurRemove((dists[i].begin()->first));
+                               m->mothurRemove((dists[i].begin()->second));
                        }
                        dists.clear();
                }
@@ -429,7 +436,7 @@ int SplitMatrix::splitDistanceLarge(){
 
                        dFile >> seqA >> seqB >> dist;
                        
-                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
+                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  m->mothurRemove((distFile + "." + toString(i) + ".temp")); }  } return 0; }
                                        
                        if(dist < cutoff){
                                //cout << "in cutoff: " << dist << endl;
@@ -557,7 +564,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                                delete memblock;
                                                                
                                                                fileB.close();
-                                                               remove(fileName2.c_str());
+                                                               m->mothurRemove(fileName2);
                                                                
                                                                //write out the merged memory
                                                                if (numOutputs[groupID] > 60) {
@@ -617,7 +624,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                                delete memblock;
                                                                
                                                                fileB.close();
-                                                               remove(fileName2.c_str());
+                                                               m->mothurRemove(fileName2);
                                                                
                                                                //write out the merged memory
                                                                if (numOutputs[groupID] > 60) {
@@ -720,8 +727,8 @@ int SplitMatrix::splitNames(vector<set<string> >& groups){
                
                if (m->control_pressed)  {  
                        for (int i = 0; i < dists.size(); i++) { 
-                               remove((dists[i].begin()->first).c_str());
-                               remove((dists[i].begin()->second).c_str());
+                               m->mothurRemove((dists[i].begin()->first));
+                               m->mothurRemove((dists[i].begin()->second));
                        }
                        dists.clear();
                }
@@ -750,7 +757,7 @@ int SplitMatrix::splitDistanceRAM(){
 
                        dFile >> seqA >> seqB >> dist;
                        
-                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
+                       if (m->control_pressed) {   dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  m->mothurRemove((distFile + "." + toString(i) + ".temp")); }  } return 0; }
                                        
                        if(dist < cutoff){
                                //cout << "in cutoff: " << dist << endl;