]> git.donarmstrong.com Git - mothur.git/blobdiff - removerarecommand.cpp
added end of file character to some type type reads. fixes while testing.
[mothur.git] / removerarecommand.cpp
index e46aa09f9e338ddd49305758504288f7dc65aa5e..c0694480bcebf7a3738c17ef812720577dc99c7d 100644 (file)
 //**********************************************************************************************************************
 vector<string> RemoveRareCommand::setParameters(){     
        try {
-               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
-               CommandParameter prabund("rabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(prabund);
-               CommandParameter psabund("sabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psabund);
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pnseqs("nseqs", "Number", "", "0", "", "", "",false,true); parameters.push_back(pnseqs);
-               CommandParameter pbygroup("bygroup", "Boolean", "", "f", "", "", "",false,true); parameters.push_back(pbygroup);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "atleast", "none","list",false,false,true); parameters.push_back(plist);
+               CommandParameter prabund("rabund", "InputTypes", "", "", "none", "atleast", "none","rabund",false,false,true); parameters.push_back(prabund);
+               CommandParameter psabund("sabund", "InputTypes", "", "", "none", "atleast", "none","sabund",false,false,true); parameters.push_back(psabund);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "atleast", "none","shared",false,false,true); parameters.push_back(pshared);
+        CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "none", "none","count",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pnseqs("nseqs", "Number", "", "0", "", "", "","",false,true,true); parameters.push_back(pnseqs);
+               CommandParameter pbygroup("bygroup", "Boolean", "", "f", "", "", "","",false,false); parameters.push_back(pbygroup);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -41,7 +42,7 @@ vector<string> RemoveRareCommand::setParameters(){
 string RemoveRareCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The remove.rare command parameters are list, rabund, sabund, shared, group, label, groups, bygroup and nseqs.\n";
+               helpString += "The remove.rare command parameters are list, rabund, sabund, shared, group, count, label, groups, bygroup and nseqs.\n";
                helpString += "The remove.rare command reads one of the following file types: list, rabund, sabund or shared file. It outputs a new file after removing the rare otus.\n";
                helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.  Default=all. You may separate group names with dashes.\n";
                helpString += "The label parameter is used to analyze specific labels in your input. default=all. You may separate label names with dashes.\n";
@@ -59,6 +60,26 @@ string RemoveRareCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string RemoveRareCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "rabund")            {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "sabund")    {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "shared")      {   pattern = "[filename],[tag],pick,[extension]";    }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "RemoveRareCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 RemoveRareCommand::RemoveRareCommand(){        
        try {
                abort = true; calledHelp = true; 
@@ -68,6 +89,7 @@ RemoveRareCommand::RemoveRareCommand(){
                outputTypes["sabund"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
                outputTypes["shared"] = tempOutNames;
        }
        catch(exception& e) {
@@ -83,6 +105,7 @@ RemoveRareCommand::RemoveRareCommand(string option)  {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
@@ -105,6 +128,7 @@ RemoveRareCommand::RemoveRareCommand(string option)  {
                        outputTypes["list"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
                        outputTypes["shared"] = tempOutNames;   
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -153,29 +177,51 @@ RemoveRareCommand::RemoveRareCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        
                        //check for file parameters
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not open") { abort = true; }
-                       else if (listfile == "not found") {  listfile = "";  }  
+                       else if (listfile == "not found") {  listfile = "";  }
+                       else { m->setListFile(listfile); }
                        
                        sabundfile = validParameter.validFile(parameters, "sabund", true);
                        if (sabundfile == "not open") { abort = true; }
                        else if (sabundfile == "not found") {  sabundfile = "";  }      
+                       else { m->setSabundFile(sabundfile); }
                        
                        rabundfile = validParameter.validFile(parameters, "rabund", true);
                        if (rabundfile == "not open") { abort = true; }
                        else if (rabundfile == "not found") {  rabundfile = "";  }                              
+                       else { m->setRabundFile(rabundfile); }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { groupfile = ""; abort = true; }
                        else if (groupfile == "not found") {  groupfile = "";  }        
+                       else { m->setGroupFile(groupfile); }
                        
                        sharedfile = validParameter.validFile(parameters, "shared", true);
                        if (sharedfile == "not open") { sharedfile = "";  abort = true; }
                        else if (sharedfile == "not found") {  sharedfile = "";  }
+                       else { m->setSharedFile(sharedfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+                               
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
                        
                        if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
                                //is there are current file available for any of these?
@@ -214,14 +260,14 @@ RemoveRareCommand::RemoveRareCommand(string option)  {
                        
                        string temp = validParameter.validFile(parameters, "nseqs", false);      
                        if (temp == "not found") { m->mothurOut("nseqs is a required parameter."); m->mothurOutEndLine(); abort = true; }
-                       else { convert(temp, nseqs); }
+                       else { m->mothurConvert(temp, nseqs); }
                        
                        temp = validParameter.validFile(parameters, "bygroup", false);   if (temp == "not found") { temp = "f"; }
                        byGroup = m->isTrue(temp);
                        
                        if (byGroup && (sharedfile == "")) { m->mothurOut("The byGroup parameter is only valid with a shared file."); m->mothurOutEndLine(); }
                        
-                       if ((groupfile != "") && (listfile == "")) { m->mothurOut("A groupfile is only valid with a list file."); m->mothurOutEndLine(); groupfile = ""; }
+                       if (((groupfile != "") || (countfile != "")) && (listfile == "")) { m->mothurOut("A group or count file is only valid with a list file."); m->mothurOutEndLine(); groupfile = ""; countfile = ""; }
                }
                
        }
@@ -245,7 +291,7 @@ int RemoveRareCommand::execute(){
                if (listfile != "")                     {               processList();          }
                if (sharedfile != "")           {               processShared();        }
                
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
                        
                if (outputNames.size() != 0) {
                        m->mothurOutEndLine();
@@ -279,6 +325,11 @@ int RemoveRareCommand::execute(){
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
                        }
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -295,9 +346,17 @@ int RemoveRareCommand::processList(){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
-               string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" +  m->getExtension(groupfile);
-               
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[extension]"] = m->getExtension(listfile);
+               string outputFileName = getOutputFileName("list", variables);
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+        variables["[extension]"] = m->getExtension(groupfile);
+               string outputGroupFileName = getOutputFileName("group", variables);
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+        variables["[extension]"] = m->getExtension(countfile);
+        string outputCountFileName = getOutputFileName("count", variables);
+        
                ofstream out, outGroup;
                m->openOutputFile(outputFileName, out);
                
@@ -344,12 +403,21 @@ int RemoveRareCommand::processList(){
                
                //if groupfile is given then use it
                GroupMap* groupMap;
+        CountTable ct;
                if (groupfile != "") { 
                        groupMap = new GroupMap(groupfile); groupMap->readMap(); 
                        SharedUtil util;
-                       util.setGroups(Groups, groupMap->namesOfGroups);
+                       vector<string> namesGroups = groupMap->getNamesOfGroups();
+                       util.setGroups(Groups, namesGroups);
                        m->openOutputFile(outputGroupFileName, outGroup);
-               }
+               }else if (countfile != "") {
+            ct.readTable(countfile);
+            if (ct.hasGroupInfo()) {
+                vector<string> namesGroups = ct.getNamesOfGroups();
+                SharedUtil util;
+                util.setGroups(Groups, namesGroups);
+            }
+        }
                
                
                if (list != NULL) {     
@@ -359,13 +427,14 @@ int RemoveRareCommand::processList(){
                        
                        //for each bin
                        for (int i = 0; i < list->getNumBins(); i++) {
-                               if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); remove(outputGroupFileName.c_str()); } out.close();  remove(outputFileName.c_str());  return 0; }
+                               if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
                                
                                //parse out names that are in accnos file
                                string binnames = list->get(i);
                                vector<string> names;
                                string saveBinNames = binnames;
                                m->splitAtComma(binnames, names);
+                int binsize = names.size();
                                
                                vector<string> newGroupFile;
                                if (groupfile != "") {
@@ -381,14 +450,38 @@ int RemoveRareCommand::processList(){
                                                        saveBinNames += names[k] + ",";
                                                }
                                        }
-                                       names = newNames;
+                                       names = newNames; binsize = names.size();
                                        saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
-                               }
+                               }else if (countfile != "") {
+                                       saveBinNames = "";
+                    binsize = 0;
+                                       for(int k = 0; k < names.size(); k++) {
+                        if (ct.hasGroupInfo()) {
+                            vector<string> thisSeqsGroups = ct.getGroups(names[k]);
+                            
+                            int thisSeqsCount = 0;
+                            for (int n = 0; n < thisSeqsGroups.size(); n++) {
+                                if (m->inUsersGroups(thisSeqsGroups[n], Groups)) {
+                                    thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]);
+                                }
+                            }
+                            binsize += thisSeqsCount;
+                            //if you don't have any seqs from the groups the user wants, then remove you.
+                            if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); }
+                            else { saveBinNames += names[k] + ","; }
+                        }else {
+                            binsize += ct.getNumSeqs(names[k]); 
+                            saveBinNames += names[k] + ",";
+                        }
+                                       }
+                                       saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
+                }
 
-                               if (names.size() > nseqs) { //keep bin
+                               if (binsize > nseqs) { //keep bin
                                        newList.push_back(saveBinNames);
-                                       for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }
-                               }
+                                       if (groupfile != "") {  for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }  }
+                    else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) {  ct.remove(newGroupFile[k]); } }  
+                               }else {  if (countfile != "") {  for(int k = 0; k < names.size(); k++) {  ct.remove(names[k]); } }  }
                        }
                        
                        //print new listvector
@@ -400,6 +493,17 @@ int RemoveRareCommand::processList(){
                
                out.close();
                if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
+        if (countfile != "") { 
+            if (ct.hasGroupInfo()) {
+                vector<string> allGroups = ct.getNamesOfGroups();
+                for (int i = 0; i < allGroups.size(); i++) {
+                    if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); }
+                }
+
+            }
+            ct.printTable(outputCountFileName);
+            outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); 
+        }
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
                outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
@@ -416,7 +520,10 @@ int RemoveRareCommand::processSabund(){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(sabundfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "pick" +  m->getExtension(sabundfile);
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile));
+        variables["[extension]"] = m->getExtension(sabundfile);
+               string outputFileName = getOutputFileName("sabund", variables);
                outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
 
                ofstream out;
@@ -515,7 +622,10 @@ int RemoveRareCommand::processRabund(){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(rabundfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "pick" +  m->getExtension(rabundfile);
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile));
+        variables["[extension]"] = m->getExtension(rabundfile);
+               string outputFileName = getOutputFileName("rabund", variables);
                outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                ofstream out;
@@ -618,11 +728,14 @@ int RemoveRareCommand::processRabund(){
 //**********************************************************************************************************************
 int RemoveRareCommand::processShared(){
        try {
-               m->Groups = Groups;
+               m->setGroups(Groups);
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "pick" +  m->getExtension(sharedfile);
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[extension]"] = m->getExtension(sharedfile);
+               string outputFileName = getOutputFileName("shared", variables);
                outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                ofstream out;
@@ -645,6 +758,7 @@ int RemoveRareCommand::processShared(){
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
                                
+                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
                                processLookup(lookup, out);
                        }
                        
@@ -658,6 +772,7 @@ int RemoveRareCommand::processShared(){
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
                                
+                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
                                processLookup(lookup, out);                     
                                
                                //restore real lastlabel to save below
@@ -692,6 +807,7 @@ int RemoveRareCommand::processShared(){
                        
                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                        
+                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
                        processLookup(lookup, out);     
                        
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }