]> git.donarmstrong.com Git - mothur.git/blobdiff - subsamplecommand.cpp
added headers to shared and relabund files
[mothur.git] / subsamplecommand.cpp
index 5bc49d6503e8a3f24b35933129cb36a11cf08587..0f7bdec57cc94e1b721c8cee65c57eb01a5e6124 100644 (file)
@@ -53,7 +53,7 @@ string SubSampleCommand::getHelpString(){
                helpString += "Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n";
                helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
                helpString += "The sub.sample command outputs a .subsample file.\n";
-               helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n";
+               helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
                return helpString;
        }
        catch(exception& e) {
@@ -88,6 +88,7 @@ SubSampleCommand::SubSampleCommand(string option) {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
@@ -356,13 +357,6 @@ int SubSampleCommand::getSubSampleFasta() {
                
                if (m->control_pressed) { return 0; }
                
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "subsample" + m->getExtension(fastafile);
-               
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
                //make sure that if your picked groups size is not too big
                int thisSize = names.size();
@@ -375,13 +369,14 @@ int SubSampleCommand::getSubSampleFasta() {
                                        if (thisSize < size) {  size = thisSize;        }
                                }
                        }else { //make sure size is not too large
-                               int smallestSize = groupMap->getNumSeqs(Groups[0]);
-                               for (int i = 1; i < Groups.size(); i++) {
+                               vector<string> newGroups;
+                               for (int i = 0; i < Groups.size(); i++) {
                                        int thisSize = groupMap->getNumSeqs(Groups[i]);
                                        
-                                       if (thisSize < smallestSize) {  smallestSize = thisSize;        }
+                                       if (thisSize >= size) { newGroups.push_back(Groups[i]); }
+                                       else {  m->mothurOut("You have selected a size that is larger than " + Groups[i] + " number of sequences, removing " + Groups[i] + "."); m->mothurOutEndLine(); }
                                }
-                               if (smallestSize < size) { size = smallestSize; m->mothurOut("You have selected a size that is larger than your smallest sample, using your samllest sample size, " + toString(smallestSize) + "."); m->mothurOutEndLine(); }
+                               Groups = newGroups;
                        }
                        
                        m->mothurOut("Sampling " + toString(size) + " from each group."); m->mothurOutEndLine();                        
@@ -477,6 +472,17 @@ int SubSampleCommand::getSubSampleFasta() {
                                }
                        }       
                }
+               
+               if (subset.size() == 0) {  m->mothurOut("The size you selected is too large, skipping fasta file."); m->mothurOutEndLine();  return 0; }
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "subsample" + m->getExtension(fastafile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
                //read through fasta file outputting only the names on the subsample list
                ifstream in;
                m->openInputFile(fastafile, in);
@@ -644,14 +650,6 @@ int SubSampleCommand::readNames() {
 int SubSampleCommand::getSubSampleShared() {
        try {
                
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile);
-               
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
-               
                InputData* input = new InputData(sharedfile, "sharedfile");
                vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
                string lastLabel = lookup[0]->getLabel();
@@ -667,9 +665,34 @@ int SubSampleCommand::getSubSampleShared() {
                                
                                if (thisSize < size) {  size = thisSize;        }
                        }
+               }else {
+                       m->Groups.clear();
+                       vector<SharedRAbundVector*> temp;
+                       for (int i = 0; i < lookup.size(); i++) {
+                               if (lookup[i]->getNumSeqs() < size) { 
+                                       m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                                       delete lookup[i];
+                               }else { 
+                                       m->Groups.push_back(lookup[i]->getGroup()); 
+                                       temp.push_back(lookup[i]);
+                               }
+                       } 
+                       lookup = temp;
+                       Groups = m->Groups;
                }
                
-               m->mothurOut("Sampling " + toString(size) + " from " + toString(lookup[0]->getNumSeqs()) + "."); m->mothurOutEndLine();
+               if (lookup.size() == 0) {  m->mothurOut("The size you selected is too large, skipping shared file."); m->mothurOutEndLine(); delete input; return 0; }
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
+               
+               m->mothurOut("Sampling " + toString(size) + " from each group."); m->mothurOutEndLine();
                
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
@@ -679,6 +702,7 @@ int SubSampleCommand::getSubSampleShared() {
                                
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                                
+                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
                                processShared(lookup, out);
                                
                                processedLabels.insert(lookup[0]->getLabel());
@@ -693,6 +717,7 @@ int SubSampleCommand::getSubSampleShared() {
                                lookup = input->getSharedRAbundVectors(lastLabel);
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                                
+                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
                                processShared(lookup, out);
                                
                                processedLabels.insert(lookup[0]->getLabel());
@@ -733,6 +758,7 @@ int SubSampleCommand::getSubSampleShared() {
                        
                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                        
+                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
                        processShared(lookup, out);
                        
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
@@ -874,13 +900,14 @@ int SubSampleCommand::getSubSampleList() {
                                        if (thisSize < size) {  size = thisSize;        }
                                }
                        }else { //make sure size is not too large
-                               int smallestSize = groupMap->getNumSeqs(Groups[0]);
-                               for (int i = 1; i < Groups.size(); i++) {
+                               vector<string> newGroups;
+                               for (int i = 0; i < Groups.size(); i++) {
                                        int thisSize = groupMap->getNumSeqs(Groups[i]);
                                        
-                                       if (thisSize < smallestSize) {  smallestSize = thisSize;        }
+                                       if (thisSize >= size) { newGroups.push_back(Groups[i]); }
+                                       else {  m->mothurOut("You have selected a size that is larger than " + Groups[i] + " number of sequences, removing " + Groups[i] + "."); m->mothurOutEndLine(); }
                                }
-                               if (smallestSize < size) { size = smallestSize; m->mothurOut("You have selected a size that is larger than your smallest sample, using your samllest sample size, " + toString(smallestSize) + "."); m->mothurOutEndLine(); }
+                               Groups = newGroups;
                        }
                        
                        m->mothurOut("Sampling " + toString(size) + " from each group."); m->mothurOutEndLine();        
@@ -1123,7 +1150,7 @@ int SubSampleCommand::processList(ListVector*& list, ofstream& out, set<string>&
                                        individual += binnames[j];
                                }
                        }
-                       if (subset.count(individual) != 0) {  newNames += individual;  }
+                       if (subset.count(individual) != 0) {  newNames += individual + ",";  }
                        
                        
                        //if there are names in this bin add to new list
@@ -1151,15 +1178,6 @@ int SubSampleCommand::processList(ListVector*& list, ofstream& out, set<string>&
 //**********************************************************************************************************************
 int SubSampleCommand::getSubSampleRabund() {
        try {
-               
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(rabundfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "subsample" + m->getExtension(rabundfile);
-               
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               outputTypes["rabund"].push_back(outputFileName);  outputNames.push_back(outputFileName);
-               
                InputData* input = new InputData(rabundfile, "rabund");
                RAbundVector* rabund = input->getRAbundVector();
                string lastLabel = rabund->getLabel();
@@ -1170,10 +1188,18 @@ int SubSampleCommand::getSubSampleRabund() {
                
                if (size == 0) { //user has not set size, set size = 10%
                        size = int((rabund->getNumSeqs()) * 0.10);
-               }
+               }else if (size > rabund->getNumSeqs()) { m->mothurOut("The size you selected is too large, skipping rabund file."); m->mothurOutEndLine(); delete input; delete rabund; return 0; }
                
                m->mothurOut("Sampling " + toString(size) + " from " + toString(rabund->getNumSeqs()) + "."); m->mothurOutEndLine();
                
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(rabundfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "subsample" + m->getExtension(rabundfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["rabund"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
                //as long as you are not at the end of the file or done wih the lines you want
                while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) {  delete input; delete rabund; out.close(); return 0;  }
@@ -1308,15 +1334,7 @@ int SubSampleCommand::processRabund(RAbundVector*& rabund, ofstream& out) {
 //**********************************************************************************************************************
 int SubSampleCommand::getSubSampleSabund() {
        try {
-               
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(sabundfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "subsample" + m->getExtension(sabundfile);
-               
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               outputTypes["sabund"].push_back(outputFileName);  outputNames.push_back(outputFileName);
-               
+                               
                InputData* input = new InputData(sabundfile, "sabund");
                SAbundVector* sabund = input->getSAbundVector();
                string lastLabel = sabund->getLabel();
@@ -1327,10 +1345,20 @@ int SubSampleCommand::getSubSampleSabund() {
                
                if (size == 0) { //user has not set size, set size = 10%
                        size = int((sabund->getNumSeqs()) * 0.10);
-               }
+               }else if (size > sabund->getNumSeqs()) { m->mothurOut("The size you selected is too large, skipping sabund file."); m->mothurOutEndLine(); delete input; delete sabund; return 0; }
+               
                
                m->mothurOut("Sampling " + toString(size) + " from " + toString(sabund->getNumSeqs()) + "."); m->mothurOutEndLine();
                
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sabundfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "subsample" + m->getExtension(sabundfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["sabund"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
+               
                //as long as you are not at the end of the file or done wih the lines you want
                while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) {  delete input; delete sabund; out.close(); return 0;  }