]> git.donarmstrong.com Git - mothur.git/commitdiff
added fasta method to cluster.split
authorwestcott <westcott>
Mon, 14 Jun 2010 14:30:17 +0000 (14:30 +0000)
committerwestcott <westcott>
Mon, 14 Jun 2010 14:30:17 +0000 (14:30 +0000)
clustersplitcommand.cpp
splitmatrix.cpp
splitmatrix.h

index f6bf105cf915710947493d06b8d6461f309e9212..8d21d1a83e1eff3d518861273dff976650d8c56c 100644 (file)
@@ -113,7 +113,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        fastafile = validParameter.validFile(parameters, "fasta", true);
                        if (fastafile == "not open") { abort = true; }  
                        else if (fastafile == "not found") { fastafile = ""; }
-                       else { splitmethod = "fasta";  }
+                       else { distfile = fastafile;  splitmethod = "fasta";  }
                        
                        taxFile = validParameter.validFile(parameters, "taxonomy", true);
                        if (taxFile == "not open") { abort = true; }    
@@ -271,10 +271,10 @@ int ClusterSplitCommand::execute(){
                
                //split matrix into non-overlapping groups
                SplitMatrix* split;
-               if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                       }
-               else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                       }
-               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors);         }
-               else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; }
+               if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
+               else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
+               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors, outputDir);      }
+               else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
                
                split->split();
                
index 9e53c51a8f1b6fbba8e755279e68993fb7f03fa5..6e40e97a10b0fd2af75b09c97ae43a26095016b8 100644 (file)
@@ -24,7 +24,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri
 }
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p, string output){
        m = MothurOut::getInstance();
        fastafile = ffile;
        namefile = name;
@@ -32,6 +32,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string
        cutoff = c;
        method = t;
        processors = p;
+       outputDir = output;
 }
 
 /***********************************************************************/
@@ -163,7 +164,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                                        query.printSequence(outFile); 
                                        outFile.close();
                                        
-                                       copyGroups.erase(it);
+                                       copyGroups.erase(query.getName());
                                }
                        }
                }
@@ -180,7 +181,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
-                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors);
+                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(cutoff);
                        
                        Command* command = new DistanceCommand(options);
                        command->execute();
@@ -219,30 +220,41 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                }
                bigNameFile.close();
                
-               remainingNames.close();
-               if (!wroteExtra) { 
-                       remove(singleton.c_str());
-                       singleton = "none";
-               }
-
                for(int i=0;i<numGroups;i++){
                        string tempNameFile = namefile + "." + toString(i) + ".temp";
-                       string tempDistFile = getRootName(getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+                       string tempDistFile = outputDir + getRootName(getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
 
                        //if there are valid distances
                        ifstream fileHandle;
                        fileHandle.open(tempDistFile.c_str());
                        if(fileHandle)  {       
                                gobble(fileHandle);
-                               if (!fileHandle.eof()) {  //check for blank file
+                               if (!fileHandle.eof()) {  //check for blank file - this could occur if all dists in group are above cutoff
                                        map<string, string> temp;
                                        temp[tempDistFile] = tempNameFile;
                                        dists.push_back(temp);
+                               }else {
+                                       ifstream in;
+                                       openInputFile(tempNameFile, in);
+                               
+                                       while(!in.eof()) { 
+                                               in >> name >> nameList;  gobble(in);
+                                               wroteExtra = true;
+                                               remainingNames << name << '\t' << nameList << endl;
+                                       }
+                                       in.close();
+                                       remove(tempNameFile.c_str());
                                }
                        }
                        fileHandle.close();
                }
                
+               remainingNames.close();
+               if (!wroteExtra) { 
+                       remove(singleton.c_str());
+                       singleton = "none";
+               }
+
                if (m->control_pressed)  {  for (int i = 0; i < dists.size(); i++) { remove((dists[i].begin()->first).c_str()); remove((dists[i].begin()->second).c_str()); } dists.clear(); }
                
                return 0;
index b98bdfb4b56b952d9ca0a2c7a784c22f02671e94..acaa8c5b711a220e76e48be2fc983122514d6b32 100644 (file)
@@ -20,7 +20,7 @@ class SplitMatrix  {
        public:
 
                SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method, large
-               SplitMatrix(string, string, string, float, string, int); //fastafile, namefile, taxFile, cutoff, method, processors
+               SplitMatrix(string, string, string, float, string, int, string); //fastafile, namefile, taxFile, cutoff, method, processors, outputDir
                
                ~SplitMatrix();
                int split();
@@ -30,7 +30,7 @@ class SplitMatrix  {
        private:
                MothurOut* m;
 
-               string distFile, namefile, singleton, method, taxFile, fastafile;
+               string distFile, namefile, singleton, method, taxFile, fastafile, outputDir;
                vector< map< string, string> > dists;
                float cutoff;
                bool large;