]> git.donarmstrong.com Git - mothur.git/blobdiff - subsamplecommand.cpp
changes while testing 1.27
[mothur.git] / subsamplecommand.cpp
index d4e2c752096318748cafa69615a24627f8270aa8..8c5761d7209df23b3a8127475772e3d80b89967e 100644 (file)
@@ -10,6 +10,7 @@
 #include "subsamplecommand.h"
 #include "sharedutilities.h"
 #include "deconvolutecommand.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::setParameters(){      
@@ -62,6 +63,33 @@ string SubSampleCommand::getHelpString(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+string SubSampleCommand::getOutputFileNameTag(string type, string inputName=""){       
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "fasta")            {   outputFileName =  "subsample" + m->getExtension(inputName);   }
+            else if (type == "sabund")    {   outputFileName =  "subsample" + m->getExtension(inputName);   }
+            else if (type == "name")        {   outputFileName =  "subsample" + m->getExtension(inputName);   }
+            else if (type == "group")       {   outputFileName =  "subsample" + m->getExtension(inputName);   }
+            else if (type == "list")        {   outputFileName =  "subsample" + m->getExtension(inputName);   }
+            else if (type == "rabund")       {   outputFileName =  "subsample" + m->getExtension(inputName);   }
+            else if (type == "shared") {   outputFileName =  "subsample" + m->getExtension(inputName);        }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+
 //**********************************************************************************************************************
 SubSampleCommand::SubSampleCommand(){  
        try {
@@ -478,8 +506,7 @@ int SubSampleCommand::getSubSampleFasta() {
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "subsample" + m->getExtension(fastafile);
-               
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);         
                ofstream out;
                m->openOutputFile(outputFileName, out);
                
@@ -527,6 +554,7 @@ int SubSampleCommand::getSubSampleFasta() {
                if (namefile != "") {
                        m->mothurOut("Deconvoluting subsampled fasta file... "); m->mothurOutEndLine();
                        
+            string outputNameFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
                        //use unique.seqs to create new name and fastafile
                        string inputString = "fasta=" + outputFileName;
                        m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
@@ -541,10 +569,11 @@ int SubSampleCommand::getSubSampleFasta() {
                        delete uniqueCommand;
                        m->mothurCalling = false;
             
-                       outputTypes["name"].push_back(filenames["name"][0]);  outputNames.push_back(filenames["name"][0]);
-                       m->mothurRemove(outputFileName);
-                       outputFileName = filenames["fasta"][0];
-                       
+            m->renameFile(filenames["name"][0], outputNameFileName); 
+            m->renameFile(filenames["fasta"][0], outputFileName);  
+            
+                       outputTypes["name"].push_back(outputNameFileName);  outputNames.push_back(outputNameFileName);
+
                        m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                        
                        m->mothurOut("Done."); m->mothurOutEndLine();
@@ -557,7 +586,7 @@ int SubSampleCommand::getSubSampleFasta() {
                        
                        string groupOutputDir = outputDir;
                        if (outputDir == "") {  groupOutputDir += m->hasPath(groupfile);  }
-                       string groupOutputFileName = groupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "subsample" + m->getExtension(groupfile);
+                       string groupOutputFileName = groupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
                        
                        ofstream outGroup;
                        m->openOutputFile(groupOutputFileName, outGroup);
@@ -638,34 +667,13 @@ int SubSampleCommand::getNames() {
 int SubSampleCommand::readNames() {
        try {
                
-               ifstream in;
-               m->openInputFile(namefile, in);
-               
-               string thisname, repnames;
-               map<string, vector<string> >::iterator it;
-               
-               while(!in.eof()){
-                       
-                       if (m->control_pressed) { in.close(); return 0; }
-                       
-                       in >> thisname;         m->gobble(in);          //read from first column
-                       in >> repnames;                 //read from second column
-                       
-                       it = nameMap.find(thisname);
-                       if (it == nameMap.end()) {
-                               
-                               vector<string> splitRepNames;
-                               m->splitAtComma(repnames, splitRepNames);
-                               
-                               nameMap[thisname] = splitRepNames;      
-                               for (int i = 0; i < splitRepNames.size(); i++) { names.push_back(splitRepNames[i]); }
-                               
-                       }else{  m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine();  }
-                       
-                       m->gobble(in);
-               }
-               in.close();     
-               
+        nameMap.clear();
+        m->readNames(namefile, nameMap);
+        
+        //save names of all sequences
+        map<string, vector<string> >::iterator it;
+        for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } }
+        
                return 0;
                
        }
@@ -800,69 +808,28 @@ int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + ".subsample" + m->getExtension(sharedfile);
-               
-               
-               ofstream out;
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + thislookup[0]->getLabel() + "." +getOutputFileNameTag("shared", sharedfile);        
+        SubSample sample;
+        vector<string> subsampledLabels = sample.getSample(thislookup, size);
+        
+        if (m->control_pressed) {  return 0; }
+        
+        ofstream out;
                m->openOutputFile(outputFileName, out);
                outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
-               int numBins = thislookup[0]->getNumBins();
-               for (int i = 0; i < thislookup.size(); i++) {           
-                       int thisSize = thislookup[i]->getNumSeqs();
-                       
-                       if (thisSize != size) {
-                               
-                               string thisgroup = thislookup[i]->getGroup();
-                               
-                               OrderVector* order = new OrderVector();
-                               for(int p=0;p<numBins;p++){
-                                       for(int j=0;j<thislookup[i]->getAbundance(p);j++){
-                                               order->push_back(p);
-                                       }
-                               }
-                               random_shuffle(order->begin(), order->end());
-                               
-                               SharedRAbundVector* temp = new SharedRAbundVector(numBins);
-                               temp->setLabel(thislookup[i]->getLabel());
-                               temp->setGroup(thislookup[i]->getGroup());
-                               
-                               delete thislookup[i];
-                               thislookup[i] = temp;
-                               
-                               
-                               for (int j = 0; j < size; j++) {
-                                       
-                                       if (m->control_pressed) { delete order; out.close(); return 0; }
-                                       
-                                       //get random number to sample from order between 0 and thisSize-1.
-                                       //don't need this because of the random shuffle above
-                                       //int myrand = int((float)(thisSize) * (float)(rand()) / ((float)RAND_MAX+1.0));
-                                       
-                                       int bin = order->get(j);
-                                       
-                                       int abund = thislookup[i]->getAbundance(bin);
-                                       thislookup[i]->set(bin, (abund+1), thisgroup);
-                               }       
-                               delete order;
-                       }
-               }
-               
-               //subsampling may have created some otus with no sequences in them
-               eliminateZeroOTUS(thislookup);
-               
-               if (m->control_pressed) { out.close(); return 0; }
-               
+        m->currentBinLabels = subsampledLabels;
+        
                thislookup[0]->printHeaders(out);
                
                for (int i = 0; i < thislookup.size(); i++) {
                        out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
                        thislookup[i]->print(out);
                }
-               
                out.close();
-               
-               //save mothurOut's binLabels to restore for next label
+        
+        
+        //save mothurOut's binLabels to restore for next label
                m->currentBinLabels = saveBinLabels;
                
                return 0;
@@ -879,8 +846,7 @@ int SubSampleCommand::getSubSampleList() {
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "subsample" + m->getExtension(listfile);
-               
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);            
                ofstream out;
                m->openOutputFile(outputFileName, out);
                outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
@@ -1224,8 +1190,7 @@ int SubSampleCommand::getSubSampleRabund() {
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(rabundfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "subsample" + m->getExtension(rabundfile);
-               
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + getOutputFileNameTag("rabund", rabundfile);              
                ofstream out;
                m->openOutputFile(outputFileName, out);
                outputTypes["rabund"].push_back(outputFileName);  outputNames.push_back(outputFileName);
@@ -1379,8 +1344,7 @@ int SubSampleCommand::getSubSampleSabund() {
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(sabundfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "subsample" + m->getExtension(sabundfile);
-               
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + getOutputFileNameTag("sabund", sabundfile);              
                ofstream out;
                m->openOutputFile(outputFileName, out);
                outputTypes["sabund"].push_back(outputFileName);  outputNames.push_back(outputFileName);
@@ -1523,64 +1487,6 @@ int SubSampleCommand::processSabund(SAbundVector*& sabund, ofstream& out) {
        }
 }                      
 //**********************************************************************************************************************
-int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
-       try {
-               
-               vector<SharedRAbundVector*> newLookup;
-               for (int i = 0; i < thislookup.size(); i++) {
-                       SharedRAbundVector* temp = new SharedRAbundVector();
-                       temp->setLabel(thislookup[i]->getLabel());
-                       temp->setGroup(thislookup[i]->getGroup());
-                       newLookup.push_back(temp);
-               }
-               
-               //for each bin
-               vector<string> newBinLabels;
-               string snumBins = toString(thislookup[0]->getNumBins());
-               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
-                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-                       
-                       //look at each sharedRabund and make sure they are not all zero
-                       bool allZero = true;
-                       for (int j = 0; j < thislookup.size(); j++) {
-                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
-                       }
-                       
-                       //if they are not all zero add this bin
-                       if (!allZero) {
-                               for (int j = 0; j < thislookup.size(); j++) {
-                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
-                               }
-                               //if there is a bin label use it otherwise make one
-                               string binLabel = "Otu";
-                               string sbinNumber = toString(i+1);
-                               if (sbinNumber.length() < snumBins.length()) { 
-                                       int diff = snumBins.length() - sbinNumber.length();
-                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
-                               }
-                               binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
-                               
-                               newBinLabels.push_back(binLabel);
-                       }
-               }
-               
-               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-               thislookup.clear();
-               
-               thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************