]> git.donarmstrong.com Git - mothur.git/commitdiff
added count file to get.groups and remove.groups. added shortcut parameter to classif...
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 11 Sep 2012 19:02:46 +0000 (15:02 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 11 Sep 2012 19:02:46 +0000 (15:02 -0400)
18 files changed:
bayesian.cpp
bayesian.h
classify.cpp
classify.h
classifyseqscommand.cpp
classifyseqscommand.h
counttable.cpp
getgroupscommand.cpp
getgroupscommand.h
knn.cpp
mgclustercommand.cpp
mothurout.cpp
mothurout.h
phylotree.cpp
removegroupscommand.cpp
removegroupscommand.h
sharedcommand.cpp
sharedutilities.cpp

index 1dc38337aef1bcc3b695ff56e86061cdab58c13d..bccf0ce0dda18501c129b7604546f4e9fc43382b 100644 (file)
 #include "phylosummary.h"
 #include "referencedb.h"
 /**************************************************************************************************/
-Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f) : 
+Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f, bool sh) : 
 Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
        try {
                ReferenceDB* rdb = ReferenceDB::getInstance();
                
                threadID = tid;
                flip = f;
+        shortcuts = sh;
                string baseName = tempFile;
                        
                if (baseName == "saved") { baseName = rdb->getSavedReference(); }
@@ -63,7 +64,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                        }
                        saveIn.close();                 
                }
-               
+FilesGood = false;
                if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){       
                        if (tempFile == "saved") { m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");     m->mothurOutEndLine(); }
                        
@@ -113,7 +114,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                WordPairDiffArr.resize(numKmers);
                        
                                for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
-                    ofstream out;
+                ofstream out;
                                ofstream out2;
                                
                                #ifdef USE_MPI
@@ -124,17 +125,19 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                #endif
 
                                
-                               m->openOutputFile(probFileName, out);
+                if (shortcuts) { 
+                    m->openOutputFile(probFileName, out); 
                                
-                               //output mothur version
-                               out << "#" << m->getVersion() << endl;
+                    //output mothur version
+                    out << "#" << m->getVersion() << endl;
                                
-                               out << numKmers << endl;
+                    out << numKmers << endl;
                                
-                               m->openOutputFile(probFileName2, out2);
+                    m->openOutputFile(probFileName2, out2);
                                
-                               //output mothur version
-                               out2 << "#" << m->getVersion() << endl;
+                    //output mothur version
+                    out2 << "#" << m->getVersion() << endl;
+                }
                                
                                #ifdef USE_MPI
                                        }
@@ -151,7 +154,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                                if (pid == 0) {  
                                        #endif
 
-                                       out << i << '\t';
+                    if (shortcuts) {  out << i << '\t'; }
                                        
                                        #ifdef USE_MPI
                                                }
@@ -188,7 +191,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                                                if (pid == 0) {  
                                                        #endif
 
-                                                       out << k << '\t' << wordGenusProb[i][k] << '\t' ; 
+                            if (shortcuts) { out << k << '\t' << wordGenusProb[i][k] << '\t' ; }
                                                        
                                                        #ifdef USE_MPI
                                                                }
@@ -204,8 +207,10 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                                if (pid == 0) {  
                                        #endif
                                        
-                                       out << endl;
-                                       out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl;
+                            if (shortcuts) { 
+                                out << endl;
+                                out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl;
+                            }
                                        
                                        #ifdef USE_MPI
                                                }
@@ -218,9 +223,10 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                        if (pid == 0) {  
                                #endif
                                
-                               out.close();
-                               out2.close();
-                               
+                        if (shortcuts) { 
+                            out.close();
+                            out2.close();
+                        }
                                #ifdef USE_MPI
                                        }
                                #endif
index 7c884337495dbb1e0213d1171a54ca20e0eb932c..405fee3679df7b94ae8f392f62c27886e0b11e36 100644 (file)
@@ -18,7 +18,7 @@
 class Bayesian : public Classify {
        
 public:
-       Bayesian(string, string, string, int, int, int, int, bool);
+       Bayesian(string, string, string, int, int, int, int, bool, bool);
        ~Bayesian();
        
        string getTaxonomy(Sequence*);
index 212e563f94c4ae7af3ba0916d1040afd9b0e11fa..f44e66c359873f6830dac72d99fb1727a73c27ad 100644 (file)
@@ -200,7 +200,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                }
                                fastaFile.close();
 
-                               database->generateDB();
+                if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); }
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
@@ -260,9 +261,6 @@ int Classify::readTaxonomy(string file) {
                MPI_File inMPI;
                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
                MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
-               //char* inFileName = new char[file.length()];
-               //memcpy(inFileName, file.c_str(), file.length());
                
                char inFileName[1024];
                strcpy(inFileName, file.c_str());
index 4e0354782e5063e2a52268e88a1412b40db3c28e..6582be48d12bcaee868c3d64865a665eff073c42 100644 (file)
@@ -46,7 +46,7 @@ protected:
        string taxFile, templateFile, simpleTax;
        vector<string> names;
        int threadID;
-       bool flip, flipped;
+       bool flip, flipped, shortcuts;
        
        int readTaxonomy(string);
        vector<string> parseTax(string);
index 43a021ee7d94e9d0638a4cbf2154717f8d9d3e89..bab774012f20283e12b43606349b5b519721f0b7 100644 (file)
@@ -34,6 +34,7 @@ vector<string> ClassifySeqsCommand::setParameters(){
                CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
                CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
                CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+        CommandParameter pshortcuts("shortcuts", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshortcuts);
                CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -557,6 +558,9 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "probs", false);            if (temp == "not found"){       temp = "true";                  }
                        probs = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "shortcuts", false);   if (temp == "not found"){       temp = "true";                  }
+                       writeShortcuts = m->isTrue(temp);
                        
                        //temp = validParameter.validFile(parameters, "flip", false);                   if (temp == "not found"){       temp = "T";                             }
                        //flip = m->isTrue(temp); 
@@ -601,12 +605,12 @@ int ClassifySeqsCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
-               if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);             }
+               if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);             }
                else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
                else {
                        m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
                        m->mothurOutEndLine();
-                       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);     
+                       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);     
                }
                
                if (m->control_pressed) { delete classify; return 0; }
@@ -1004,7 +1008,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                        string extension = "";
                        if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        
-                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flip);
+                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flip, writeShortcuts);
                        pDataArray.push_back(tempclass);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
index 6d43dcb9dd2067bab91f46d42ff8afa238a4a191..6d11d9236778028e326c6f4fdd4925dd6430fd50 100644 (file)
@@ -74,7 +74,7 @@ private:
        string fastaFileName, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
        int processors, kmerSize, numWanted, cutoff, iters;
        float match, misMatch, gapOpen, gapExtend;
-       bool abort, probs, save, flip, hasName, hasCount;
+       bool abort, probs, save, flip, hasName, hasCount, writeShortcuts;
        
        int driver(linePair*, string, string, string, string);
        int createProcesses(string, string, string, string); 
@@ -100,10 +100,10 @@ struct classifyData {
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
        int count, kmerSize, threadID, cutoff, iters, numWanted;
-       bool probs, flip;
+       bool probs, flip, writeShortcuts;
         
        classifyData(){}
-       classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) {
+       classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli, bool wsh) {
                accnos = acc;
                taxonomyFileName = tx;
                templateFileName = te;
@@ -127,6 +127,7 @@ struct classifyData {
                probs = p;
                count = 0;
                flip = fli;
+        writeShortcuts = wsh;
        }
 };
 
@@ -163,12 +164,12 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                
                //make classify
                Classify* myclassify;
-               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);         }
+               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);             }
                else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
                else {
                        pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
                        pDataArray->m->mothurOutEndLine();
-                       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); 
+                       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);     
                }
                
                if (pDataArray->m->control_pressed) { delete myclassify; return 0; }
index 5307beee8379d8e758b946b3847dfa1be2d5a7b2..bc9d4da05966b6b3fc3ec5fe0e751c21be7f180f 100644 (file)
@@ -447,6 +447,7 @@ int CountTable::addGroup(string groupName) {
             counts[i] = newCounts;
         }
         hasGroups = true;
+        m->setAllGroups(groups);
         
         return 0;
     }
index fe6f571669bf820d1a153ef3f44327611666b8a1..7585c122eeb3a212fab404240f5b6f6568574634 100644 (file)
@@ -18,8 +18,9 @@ vector<string> GetGroupsCommand::setParameters(){
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);         
         CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
                CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
@@ -43,7 +44,7 @@ string GetGroupsCommand::getHelpString(){
                string helpString = "";
                helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or shared file.\n";
                helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n";
-               helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file, or are using a shared file.\n";
+               helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file, or are using a shared file.\n";
                helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n";
                helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
@@ -71,6 +72,7 @@ string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
@@ -97,6 +99,7 @@ GetGroupsCommand::GetGroupsCommand(){
                outputTypes["list"] = tempOutNames;
                outputTypes["shared"] = tempOutNames;
         outputTypes["design"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
@@ -135,6 +138,7 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        outputTypes["list"] = tempOutNames;
                        outputTypes["shared"] = tempOutNames;
             outputTypes["design"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -208,6 +212,14 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["design"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        
@@ -227,11 +239,6 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        else if (namefile == "not found") {  namefile = "";  }  
                        else { m->setNameFile(namefile); }
                        
-                       groupfile = validParameter.validFile(parameters, "group", true);
-                       if (groupfile == "not open") { groupfile = ""; abort = true; }
-                       else if (groupfile == "not found") {  groupfile = "";                   }
-                       else { m->setGroupFile(groupfile); }    
-                       
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not open") { abort = true; }
                        else if (listfile == "not found") {  listfile = "";  }
@@ -263,8 +270,22 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        if (designfile == "not open") { designfile = ""; abort = true; }
                        else if (designfile == "not found") {   designfile = "";        }
                        else { m->setDesignFile(designfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                        
-                       if ((sharedfile == "") && (groupfile == "") && (designfile == "")) { 
+                       if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { 
                                //is there are current file available for any of these?
                                if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
                                        //give priority to group, then shared
@@ -274,7 +295,11 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                                                sharedfile = m->getSharedFile(); 
                                                if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
                                                else { 
-                                                       m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                                       countfile = m->getCountTableFile(); 
+                            if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                            else { 
+                                m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                            }
                                                }
                                        }
                                }else {
@@ -288,7 +313,12 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                                                        designfile = m->getDesignFile(); 
                             if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
                             else { 
-                                m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                countfile = m->getCountTableFile(); 
+                                if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                                else { 
+                                    m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                }
+
                             }
                                                }
                                        }
@@ -297,13 +327,15 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        
                        if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; }
-                       if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
-
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+                       if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "")  && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
+                       if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
+            
+            if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
                
        }
@@ -331,6 +363,7 @@ int GetGroupsCommand::execute(){
                        SharedUtil* util = new SharedUtil();
                        vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
                        util->setGroups(Groups, gNamesOfGroups);
+            m->setGroups(Groups);
                        groupMap->setNamesOfGroups(gNamesOfGroups);
                        delete util;
                        
@@ -338,7 +371,23 @@ int GetGroupsCommand::execute(){
                        fillNames();
                        
                        delete groupMap;
-               }
+               }else if (countfile != ""){
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+            CountTable ct;
+            ct.readTable(countfile);
+            if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
+                
+            vector<string> gNamesOfGroups = ct.getNamesOfGroups();
+            SharedUtil util;
+            util.setGroups(Groups, gNamesOfGroups);
+            m->setGroups(Groups);
+            for (int i = 0; i < Groups.size(); i++) {
+                vector<string> thisGroupsSeqs = ct.getNamesOfSeqs(Groups[i]);
+                for (int j = 0; j < thisGroupsSeqs.size(); j++) { names.insert(thisGroupsSeqs[j]); }
+            }
+        }
                
                if (m->control_pressed) { return 0; }
                
@@ -346,6 +395,7 @@ int GetGroupsCommand::execute(){
                if (namefile != "")                     {               readName();             }
                if (fastafile != "")            {               readFasta();    }
                if (groupfile != "")            {               readGroup();    }
+        if (countfile != "")           {               readCount();    }
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
                if (sharedfile != "")           {               readShared();   }
@@ -396,6 +446,11 @@ int GetGroupsCommand::execute(){
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
                        }
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -742,6 +797,82 @@ int GetGroupsCommand::readGroup(){
        }
 }
 //**********************************************************************************************************************
+int GetGroupsCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               int selectedCount = 0;
+               
+        string headers = m->getline(in); m->gobble(in);
+        vector<string> columnHeaders = m->splitWhiteSpace(headers);
+        
+        vector<string> groups;
+        map<int, string> originalGroupIndexes;
+        map<string, int> GroupIndexes;
+        set<int> indexOfGroupsChosen;
+        for (int i = 2; i < columnHeaders.size(); i++) {  groups.push_back(columnHeaders[i]);  originalGroupIndexes[i-2] = columnHeaders[i]; }
+        //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  GroupIndexes[groups[i]] = i; }
+        sort(Groups.begin(), Groups.end());
+        out << "Representative_Sequence\ttotal\t";
+        for (int i = 0; i < Groups.size(); i++) { out << Groups[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[Groups[i]]); }
+        out << endl;
+        
+        string name; int oldTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
+            
+            if (names.count(name) != 0) {
+                //if group info, then read it
+                vector<int> selectedCounts; int thisTotal = 0; int temp;
+                for (int i = 0; i < groups.size(); i++) {  
+                    int thisIndex = GroupIndexes[originalGroupIndexes[i]]; 
+                    in >> temp;  m->gobble(in);
+                    if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
+                        selectedCounts.push_back(temp); thisTotal += temp;
+                    }
+                }
+
+                out << name << '\t' << thisTotal << '\t';
+                for (int i = 0; i < selectedCounts.size(); i++) {  out << selectedCounts[i] << '\t'; }
+                out << endl;
+                
+                wroteSomething = true;
+                selectedCount+= thisTotal;
+            }else {  m->getline(in); }
+            
+            m->gobble(in);
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetGroupsCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int GetGroupsCommand::readDesign(){
        try {
                string thisOutputDir = outputDir;
index 80230b49b9872ab4633dbd2d4120826b3cc1ee82..6bb608833a418f2c0854c2b8bf0ac9c16a66f790 100644 (file)
@@ -40,7 +40,7 @@ private:
        map<string, string> uniqueToRedundant; //if a namefile is given and the first column name is not selected
                                                                                   //then the other files need to change the unique name in their file to match.
                                                                                   //only add the names that need to be changed to keep the map search quick
-       string accnosfile, fastafile, namefile, groupfile, listfile, designfile, taxfile, outputDir, groups, sharedfile;
+       string accnosfile, countfile, fastafile, namefile, groupfile, listfile, designfile, taxfile, outputDir, groups, sharedfile;
        bool abort;
        vector<string> outputNames, Groups;
        GroupMap* groupMap;
@@ -48,6 +48,7 @@ private:
        int readFasta();
        int readName();
        int readGroup();
+    int readCount();
        int readList();
        int readTax();
        int fillNames();
diff --git a/knn.cpp b/knn.cpp
index 837fa6d18d4f1aed6cf34e5861942b4230c30dd1..81b21b265785c2f8a83392ee52e7aeffbc9d4370 100644 (file)
--- a/knn.cpp
+++ b/knn.cpp
@@ -14,6 +14,7 @@ Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOp
 : Classify(), num(n), search(method) {
        try {
                threadID = tid;
+        shortcuts = true;
                
                //create search database and names vector
                generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch);
index 477450475d479eb4308d9c2503086b9d7c03b051..1861aa5b5d8c19f547ed90e3ec57866a3b53ecd2 100644 (file)
@@ -13,8 +13,8 @@
 vector<string> MGClusterCommand::setParameters(){      
        try {
                CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
                CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
                CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
@@ -147,6 +147,14 @@ MGClusterCommand::MGClusterCommand(string option) {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
index 2debf843f7910e6e0d61c9092cd366f1be5b2161..d9df5a078b425b773611d25b2616e3f66a58bddf 100644 (file)
@@ -2114,6 +2114,32 @@ int MothurOut::getNumChar(string line, char c){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
+       try {
+               
+        
+               if (subset.size() > bigset.size()) { return false;  }
+               
+               //check if each guy in suset is also in bigset
+               for (int i = 0; i < subset.size(); i++) {
+                       bool match = false;
+                       for (int j = 0; j < bigset.size(); j++) {
+                               if (subset[i] == bigset[j]) { match = true; break; }
+                       }
+                       
+                       //you have a guy in subset that had no match in bigset
+                       if (match == false) { return false; }
+               }
+               
+               return true;
+        
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "isSubset");
+               exit(1);
+       }
+}
 /***********************************************************************/
 int MothurOut::mothurRemove(string filename){
        try {
index 77c5a804070eaa2dd3229375df999a5b9521dc92..33384034ae55ee7f11c72b48378fa59bfe29e16e 100644 (file)
@@ -141,6 +141,7 @@ class MothurOut {
         void splitAtChar(string&, string&, char);
                int removeConfidences(string&);
         string makeList(vector<string>&);
+        bool isSubset(vector<string>, vector<string>); //bigSet, subset
                
                //math operation
                int factorial(int num);
index 3dde18680c625eb816230a8d13774ccfc47032cf..73cb461b852c3946d62140c2d6520481cff0e937 100644 (file)
@@ -598,7 +598,7 @@ int PhyloTree::getIndex(string seqName){
        try {
                map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
        
-               if (itFind != name2Taxonomy.end()) {  return name2Taxonomy[seqName];  }
+               if (itFind != name2Taxonomy.end()) {  return itFind->second;  }
                else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
        }
        catch(exception& e) {
index 05b1170bf4501ebd3131a616fdcd58a9b49e59f2..86ddf946a9d51c1df7e43783e0470bf5aead6301 100644 (file)
@@ -18,9 +18,9 @@ vector<string> RemoveGroupsCommand::setParameters(){
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
-        CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);         CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
                CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
@@ -41,9 +41,9 @@ vector<string> RemoveGroupsCommand::setParameters(){
 string RemoveGroupsCommand::getHelpString(){   
        try {
                string helpString = "";
-               helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or sharedfile.\n";
+               helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design or sharedfile.\n";
                helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n";
-               helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file or are using a sharedfile.\n";
+               helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n";
                helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed.  You can separate group names with dashes.\n";
                helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
@@ -71,6 +71,7 @@ string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName="
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
@@ -96,6 +97,7 @@ RemoveGroupsCommand::RemoveGroupsCommand(){
                outputTypes["list"] = tempOutNames;
                outputTypes["shared"] = tempOutNames;
         outputTypes["design"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
@@ -134,6 +136,7 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                        outputTypes["list"] = tempOutNames;
                        outputTypes["shared"] = tempOutNames;
             outputTypes["design"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -207,6 +210,14 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["design"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        
@@ -258,12 +269,22 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                        else if (sharedfile == "not found") {  sharedfile = "";  }
                        else { m->setSharedFile(sharedfile); }
                        
-                       groupfile = validParameter.validFile(parameters, "group", true);
-                       if (groupfile == "not open") { groupfile = ""; abort = true; }
-                       else if (groupfile == "not found") {    groupfile = ""; }
-                       else { m->setGroupFile(groupfile); }    
                        
-                       if ((sharedfile == "") && (groupfile == "") && (designfile == "")) { 
+                       countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
+                       
+                       if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { 
                                //is there are current file available for any of these?
                                if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
                                        //give priority to group, then shared
@@ -273,7 +294,11 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                                                sharedfile = m->getSharedFile(); 
                                                if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
                                                else { 
-                                                       m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                                       countfile = m->getCountTableFile(); 
+                            if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                            else { 
+                                m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                            }
                                                }
                                        }
                                }else {
@@ -287,7 +312,12 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                                                        designfile = m->getDesignFile(); 
                             if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
                             else { 
-                                m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                countfile = m->getCountTableFile(); 
+                                if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                                else { 
+                                    m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                }
+                                
                             }
                                                }
                                        }
@@ -296,14 +326,15 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                        
                        if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (sharedfile == "") && (designfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; }
-                       if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
-                       
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
-               
+                       if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "")  && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
+                       if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
+            
+            if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
                
        }
@@ -337,7 +368,28 @@ int RemoveGroupsCommand::execute(){
                        fillNames();
                        
                        delete groupMap;
-               }
+               }else if (countfile != ""){
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+            CountTable ct;
+            ct.readTable(countfile);
+            if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
+            
+            vector<string> gNamesOfGroups = ct.getNamesOfGroups();
+            SharedUtil util;
+            util.setGroups(Groups, gNamesOfGroups);
+            vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+            sort(Groups.begin(), Groups.end());
+            
+            for (int i = 0; i < namesOfSeqs.size(); i++) {
+                vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
+                if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
+                    names.insert(namesOfSeqs[i]);
+                }
+            }
+        }
+
                                
                if (m->control_pressed) { return 0; }
                
@@ -345,6 +397,7 @@ int RemoveGroupsCommand::execute(){
                if (namefile != "")                     {               readName();             }
                if (fastafile != "")            {               readFasta();    }
                if (groupfile != "")            {               readGroup();    }
+        if (countfile != "")           {               readCount();    }
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
                if (sharedfile != "")           {               readShared();   }
@@ -394,6 +447,11 @@ int RemoveGroupsCommand::execute(){
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
                        }
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -762,6 +820,87 @@ int RemoveGroupsCommand::readGroup(){
        }
 }
 //**********************************************************************************************************************
+int RemoveGroupsCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+        string headers = m->getline(in); m->gobble(in);
+        vector<string> columnHeaders = m->splitWhiteSpace(headers);
+        
+        vector<string> groups;
+        map<int, string> originalGroupIndexes;
+        map<string, int> GroupIndexes;
+        set<int> indexOfGroupsChosen;
+        for (int i = 2; i < columnHeaders.size(); i++) {  groups.push_back(columnHeaders[i]);  originalGroupIndexes[i-2] = columnHeaders[i]; }
+        //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  GroupIndexes[groups[i]] = i; }
+
+               vector<string> groupsToKeep;
+               for (int i = 0; i < groups.size(); i++) {
+                       if (!m->inUsersGroups(groups[i], Groups)) { groupsToKeep.push_back(groups[i]); }
+               }
+        sort(groupsToKeep.begin(), groupsToKeep.end());
+        out << "Representative_Sequence\ttotal\t";
+        for (int i = 0; i < groupsToKeep.size(); i++) { out << groupsToKeep[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[groupsToKeep[i]]); }
+        out << endl;
+        
+        string name; int oldTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
+            
+            if (names.count(name) == 0) {
+                //if group info, then read it
+                vector<int> selectedCounts; int thisTotal = 0; int temp;
+                for (int i = 0; i < groups.size(); i++) {  
+                    int thisIndex = GroupIndexes[originalGroupIndexes[i]]; 
+                    in >> temp;  m->gobble(in);
+                    if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
+                        selectedCounts.push_back(temp); thisTotal += temp;
+                    }
+                }
+                
+                out << name << '\t' << thisTotal << '\t';
+                for (int i = 0; i < selectedCounts.size(); i++) {  out << selectedCounts[i] << '\t'; }
+                out << endl;
+                
+                wroteSomething = true;
+                removedCount+= (oldTotal - thisTotal);
+            }else {  m->getline(in); removedCount += oldTotal; }
+            
+            m->gobble(in);
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveGroupsCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int RemoveGroupsCommand::readDesign(){
        try {
                string thisOutputDir = outputDir;
index c6db38057f8c206ecbba0806486078eccdcc7278..c36998ad8bb52349d3c5d0ee73812dd8a6f50c07 100644 (file)
@@ -36,7 +36,7 @@ public:
        
 private:
        set<string> names;
-       string accnosfile, fastafile, namefile, groupfile, designfile, listfile, taxfile, outputDir, groups, sharedfile;
+       string accnosfile, fastafile, namefile, groupfile, countfile, designfile, listfile, taxfile, outputDir, groups, sharedfile;
        bool abort;
        vector<string> outputNames, Groups;
        GroupMap* groupMap;
@@ -49,6 +49,7 @@ private:
        int readShared();
        int readName();
        int readGroup();
+    int readCount();
        int readList();
        int readTax();
        int fillNames();
index e6e790d1687f27a7607555c176bd7b1ebcd44363..39801063110e6ca91a3f9029caf5c7b6eb8d6e91 100644 (file)
@@ -1083,8 +1083,12 @@ int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVe
                        for (int j = 0; j < listNames.size(); j++) {
                                int num = groupNamesSeqs.count(listNames[j]);
                                
-                               if (num == 0) { error = 1; m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
-                               else { groupNamesSeqs.erase(listNames[j]); }
+                               if (num == 0) { 
+                    error = 1; 
+                    if (groupfile != "") { 
+                        m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine();   } 
+                    else{ m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your count file. Please correct."); m->mothurOutEndLine();        }
+                }else { groupNamesSeqs.erase(listNames[j]); }
                        }
                }
                
index 151b254c7ca983abe1568a962aba5eef0e2f5ead..71d7782cad87c7914c2d14071ec1cc657b2803d1 100644 (file)
@@ -120,7 +120,7 @@ void SharedUtil::setGroups(vector<string>& userGroups, vector<string>& allGroups
                                
                                //if the user only entered invalid groups
                                if (userGroups.size() == 0) { 
-                                       m->mothurOut("You provided no valid groups. I will run the command using all the groups in your groupfile."); m->mothurOutEndLine();
+                                       m->mothurOut("You provided no valid groups. I will run the command using all the groups in your file."); m->mothurOutEndLine();
                                        for (int i = 0; i < allGroups.size(); i++) {
                                                userGroups.push_back(allGroups[i]);
                                        }