]> git.donarmstrong.com Git - mothur.git/blobdiff - splitabundcommand.cpp
changes while setting up test files
[mothur.git] / splitabundcommand.cpp
index 17809631f9e9cc590e510a33ec5528e493a14d3c..9c514a9371ae37a24114c3eacf59424733040cfc 100644 (file)
@@ -9,19 +9,82 @@
 
 #include "splitabundcommand.h"
 
+//**********************************************************************************************************************
+vector<string> SplitAbundCommand::setParameters(){     
+       try {           
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+               CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+               CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitAbundCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SplitAbundCommand::getHelpString(){     
+       try {
+               string helpString = "";
+               helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
+               helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
+               helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
+               helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
+               helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
+               helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
+               helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
+               helpString += "The groups parameter allows you to parse the files into rare and abundant files by group.  \n";
+               helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n";
+               helpString += "If you want .abund and .rare files for all groups, set groups=all.  \n";
+               helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
+               helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitAbundCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+SplitAbundCommand::SplitAbundCommand(){        
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["list"] = tempOutNames;
+               outputTypes["name"] = tempOutNames;
+               outputTypes["accnos"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 SplitAbundCommand::SplitAbundCommand(string option)  {
        try {
-               abort = false;
+               abort = false; calledHelp = false;   
                allLines = 1;
                        
                //allow user to run help
-               if(option == "help") { help(); abort = true; }
+               if(option == "help") { help(); abort = true; calledHelp = true; }
                
                else {
-                       //valid paramters for this command
-                       string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
@@ -33,7 +96,15 @@ SplitAbundCommand::SplitAbundCommand(string option)  {
                        for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
-                                               
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       outputTypes["name"] = tempOutNames;
+                       outputTypes["accnos"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;                    
+                                                                                               
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -90,7 +161,11 @@ SplitAbundCommand::SplitAbundCommand(string option)  {
                
                        fastafile = validParameter.validFile(parameters, "fasta", true);
                        if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
+                       else if (fastafile == "not found") {                            
+                               fastafile = m->getFastaFile(); 
+                               if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }       
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") {  groupfile = ""; abort = true; } 
@@ -115,7 +190,15 @@ SplitAbundCommand::SplitAbundCommand(string option)  {
                        if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
                        
                        //do you have all files needed
-                       if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
+                       if ((listfile == "") && (namefile == "")) { 
+                               namefile = m->getNameFile(); 
+                               if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
+                               else {                          
+                                       listfile = m->getListFile(); 
+                                       if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                                       else {  m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
+                               }
+                       }
                        
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
@@ -143,29 +226,6 @@ SplitAbundCommand::SplitAbundCommand(string option)  {
        }
 }
 //**********************************************************************************************************************
-void SplitAbundCommand::help(){
-       try {
-               m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
-               m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
-               m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
-               m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
-               m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
-               m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
-               m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
-               m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
-               m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
-               m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
-               m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
-               m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
-
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SplitAbundCommand", "help");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
 SplitAbundCommand::~SplitAbundCommand(){ 
        if (groupfile != "") {  delete groupMap;  } 
 }
@@ -173,28 +233,10 @@ SplitAbundCommand::~SplitAbundCommand(){
 int SplitAbundCommand::execute(){
        try {
        
-               if (abort == true) {    return 0;       }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (listfile != "") { //you are using a listfile to determine abundance
                        if (outputDir == "") { outputDir = m->hasPath(listfile); }
-               
-                       //remove old files so you can append later....
-                       string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
-                       if (Groups.size() == 0) {
-                               remove((fileroot + "rare.list").c_str());
-                               remove((fileroot + "abund.list").c_str());
-                               
-                               outputNames.push_back((fileroot + "rare.list"));
-                               outputNames.push_back((fileroot + "abund.list"));
-                       }else{
-                               for (int i=0; i<Groups.size(); i++) {
-                                       remove((fileroot + Groups[i] + ".rare.list").c_str());
-                                       remove((fileroot + Groups[i] + ".abund.list").c_str());
-                                       
-                                       outputNames.push_back((fileroot + Groups[i] + ".rare.list"));
-                                       outputNames.push_back((fileroot + Groups[i] + ".abund.list"));
-                               }
-                       }
                        
                        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                        set<string> processedLabels;
@@ -290,7 +332,34 @@ int SplitAbundCommand::execute(){
                        if (accnos)                                                     {  writeAccnos(tag);    }
                        if (fastafile != "")                            {  parseFasta(tag);             }
                }
-
+               
+               //set fasta file as new current fastafile
+               string current = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+               
+               itTypes = outputTypes.find("name");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+               }
+               
+               itTypes = outputTypes.find("group");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+               }
+               
+               itTypes = outputTypes.find("list");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
+               }
+               
+               itTypes = outputTypes.find("accnos");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+               }
+               
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
@@ -326,9 +395,11 @@ int SplitAbundCommand::splitList(ListVector* thisList) {
                        }
                }//end for
 
-               writeList(thisList);
                
                string tag = thisList->getLabel() + ".";
+               
+               writeList(thisList, tag);
+               
                if (groupfile != "")                            {  parseGroup(tag);             }
                if (accnos)                                                     {  writeAccnos(tag);    }
                if (fastafile != "")                            {  parseFasta(tag);             }
@@ -342,7 +413,7 @@ int SplitAbundCommand::splitList(ListVector* thisList) {
        }
 }
 /**********************************************************************************************************************/
-int SplitAbundCommand::writeList(ListVector* thisList) { 
+int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
        try {
                
                map<string, ofstream*> filehandles;
@@ -364,13 +435,13 @@ int SplitAbundCommand::writeList(ListVector* thisList) {
                        ofstream aout;
                        ofstream rout;
                        
-                       string rare = outputDir + m->getRootName(m->getSimpleName(listfile))  + "rare.list";
-                       m->openOutputFileAppend(rare, rout);
-                       //outputNames.push_back(rare);
+                       string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
+                       m->openOutputFile(rare, rout);
+                       outputNames.push_back(rare); outputTypes["list"].push_back(rare);
                        
-                       string abund = outputDir + m->getRootName(m->getSimpleName(listfile))  + "abund.list";
-                       m->openOutputFileAppend(abund, aout);
-                       //outputNames.push_back(abund);
+                       string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
+                       m->openOutputFile(abund, aout);
+                       outputNames.push_back(abund); outputTypes["list"].push_back(abund);
 
                        if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
                        if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
@@ -406,8 +477,10 @@ int SplitAbundCommand::writeList(ListVector* thisList) {
                                temp2 = new ofstream;
                                filehandles[Groups[i]+".abund"] = temp2;
                                
-                               m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
-                               m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
+                               m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
+                               m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
+                               outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
+                               outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
                        }
                        
                        map<string, string> groupVector;
@@ -572,11 +645,11 @@ int SplitAbundCommand::writeNames() { //namefile
                        
                        string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
                        m->openOutputFile(rare, rout);
-                       outputNames.push_back(rare);
+                       outputNames.push_back(rare); outputTypes["name"].push_back(rare);
                        
                        string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
                        m->openOutputFile(abund, aout);
-                       outputNames.push_back(abund);
+                       outputNames.push_back(abund); outputTypes["name"].push_back(abund);
                        
                        if (rareNames.size() != 0) {
                                for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
@@ -642,7 +715,7 @@ int SplitAbundCommand::writeNames() { //namefile
                        
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
                                (*(filehandles[it3->first])).close();
-                               outputNames.push_back(fileroot + it3->first + ".names");  
+                               outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
                                delete it3->second;
                        }
                }
@@ -669,7 +742,7 @@ int SplitAbundCommand::writeAccnos(string tag) {
                        
                        string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
                        m->openOutputFile(rare, rout);
-                       outputNames.push_back(rare);
+                       outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
                        
                        for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
                                rout << (*itRare) << endl;
@@ -678,7 +751,7 @@ int SplitAbundCommand::writeAccnos(string tag) {
                
                        string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
                        m->openOutputFile(abund, aout);
-                       outputNames.push_back(abund);
+                       outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
                        
                        for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
                                aout << (*itAbund) << endl;
@@ -723,7 +796,7 @@ int SplitAbundCommand::writeAccnos(string tag) {
                        //close files
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
                                (*(filehandles[it3->first])).close();
-                               outputNames.push_back(fileroot + tag + it3->first + ".accnos");  
+                               outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
                                delete it3->second;
                        }
                }
@@ -748,11 +821,11 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile
                        
                        string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
                        m->openOutputFile(rare, rout);
-                       outputNames.push_back(rare);
+                       outputNames.push_back(rare); outputTypes["group"].push_back(rare);
                
                        string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
                        m->openOutputFile(abund, aout);
-                       outputNames.push_back(abund);
+                       outputNames.push_back(abund); outputTypes["group"].push_back(abund);
                        
                        for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
                                vector<string> names;
@@ -817,7 +890,7 @@ int SplitAbundCommand::parseGroup(string tag) { //namefile
                        
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
                                (*(filehandles[it3->first])).close();
-                               outputNames.push_back(fileroot + tag + it3->first + ".groups");  
+                               outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
                                delete it3->second;
                        }
                }
@@ -842,11 +915,11 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile
                        
                        string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
                        m->openOutputFile(rare, rout);
-                       outputNames.push_back(rare);
+                       outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
                
                        string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
                        m->openOutputFile(abund, aout);
-                       outputNames.push_back(abund);
+                       outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
                
                        //open input file
                        ifstream in;
@@ -937,7 +1010,7 @@ int SplitAbundCommand::parseFasta(string tag) { //namefile
                        
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
                                (*(filehandles[it3->first])).close();
-                               outputNames.push_back(fileroot + tag + it3->first + ".fasta");  
+                               outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
                                delete it3->second;
                        }
                }