]> git.donarmstrong.com Git - mothur.git/blobdiff - readtreecommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / readtreecommand.cpp
index b030542ef8f7e5933f2bf32c4c6bf6ba9e051ee0..edfdf3b77e0d85ff51afce83323d0204ab987a66 100644 (file)
 #include "readtreecommand.h"
 
 //**********************************************************************************************************************
-ReadTreeCommand::ReadTreeCommand(string option){
+vector<string> ReadTreeCommand::getValidParameters(){  
+       try {
+               string Array[] =  {"tree","group","name","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ReadTreeCommand::getRequiredParameters(){       
+       try {
+               string Array[] =  {"tree"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ReadTreeCommand::getRequiredFiles(){    
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ReadTreeCommand::ReadTreeCommand(string option)  {
        try {
                globaldata = GlobalData::getInstance();
                abort = false;
-               
+                               
                //allow user to run help
                if(option == "help") { help(); abort = true; }
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"tree","group"};
+                       string Array[] =  {"tree","group","name","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
                        
                        ValidParameters validParameter;
+                       map<string, string>::iterator it;
                
                        //check to make sure all parameters are valid for command
-                       for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
                        globaldata->newRead();
                        
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("tree");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["tree"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+
+                       }
+
+                       
                        //check for required parameters
                        treefile = validParameter.validFile(parameters, "tree", true);
                        if (treefile == "not open") { abort = true; }
-                       else if (treefile == "not found") { treefile = ""; cout << "tree is a required parameter for the read.tree command." << endl; abort = true;  }  
-//                     else {  globaldata->setTreeFile(treefile);  globaldata->setFormat("tree");      }
+                       else if (treefile == "not found") { treefile = ""; m->mothurOut("tree is a required parameter for the read.tree command."); m->mothurOutEndLine(); abort = true;  }     
+                       else {  globaldata->setTreeFile(treefile);  globaldata->setFormat("tree");      }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }  
-                       else if (groupfile == "not found") { groupfile = ""; cout << "group is a required parameter for the read.tree command." << endl; abort = true;  }
-                       else {  
+                       else if (groupfile == "not found") { 
+                               groupfile = ""; 
+                               
+                               m->mothurOut("You have not provided a group file. I am assumming all sequence are from the same group."); m->mothurOutEndLine();        
+                               
+                               if (treefile != "") {  Tree* tree = new Tree(treefile); delete tree;  } //extracts names from tree to make faked out groupmap
+                               
+                               globaldata->setGroupFile(groupfile); 
+                               //read in group map info.
+                               treeMap = new TreeMap();
+                               for (int i = 0; i < globaldata->Treenames.size(); i++) { treeMap->addSeq(globaldata->Treenames[i], "Group1"); }
+                               globaldata->gTreemap = treeMap;
+                                       
+                       }else {  
                                globaldata->setGroupFile(groupfile); 
                                //read in group map info.
                                treeMap = new TreeMap(groupfile);
@@ -52,6 +132,11 @@ ReadTreeCommand::ReadTreeCommand(string option){
                                globaldata->gTreemap = treeMap;
                        }
                        
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }
+                       else if (namefile == "not found") { namefile = ""; }
+                       else { readNamesFile(); }       
+                       
                        if (abort == false) {
                                filename = treefile;
                                read = new ReadNewickTree(filename);
@@ -60,11 +145,7 @@ ReadTreeCommand::ReadTreeCommand(string option){
                }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTreeCommand class Function ReadTreeCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTreeCommand class function ReadTreeCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTreeCommand", "ReadTreeCommand");           
                exit(1);
        }
 }
@@ -72,26 +153,24 @@ ReadTreeCommand::ReadTreeCommand(string option){
 
 void ReadTreeCommand::help(){
        try {
-               cout << "The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. " << "\n";
-               cout << "It also must be run before using the parsimony command, unless you are using the randomtree parameter." << "\n";
-               cout << "The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile)." << "\n";
-               cout << "The tree and group parameters are both required." << "\n";
-               cout << "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile)." << "\n" << "\n";
+               m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n");
+               m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n");
+               m->mothurOut("The read.tree command parameters are tree, group and name.\n");
+               m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n");
+               m->mothurOut("The tree and group parameters are both required, if no group file is given then one group is assumed.\n");
+               m->mothurOut("The name parameter allows you to enter a namefile.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n");
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTreeCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTreeCommand", "help");      
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTreeCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
 }
 
 //**********************************************************************************************************************
 
 ReadTreeCommand::~ReadTreeCommand(){
-       delete read;
+       if (abort == false) { delete read; }
 }
 
 //**********************************************************************************************************************
@@ -105,17 +184,33 @@ int ReadTreeCommand::execute(){
                
                readOk = read->read(); 
                
-               if (readOk != 0) { cout << "Read Terminated." << endl; globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; }
+               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); globaldata->gTree.clear(); delete globaldata->gTreemap; return 0; }
                
                vector<Tree*> T = globaldata->gTree;
-               
+
                //assemble users trees
                for (int i = 0; i < T.size(); i++) {
+                       if (m->control_pressed) {  
+                               for (int i = 0; i < T.size(); i++) {  delete T[i];  }
+                               globaldata->gTree.clear();
+                               delete globaldata->gTreemap;
+                               return 0;
+                       }
+       
                        T[i]->assembleTree();
                }
 
-               //output any names that are in names file but not in tree
-               if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
+               
+               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+               int numNamesInTree;
+               if (namefile != "")  {  
+                       if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       else {   numNamesInTree = globaldata->Treenames.size();  }
+               }else {  numNamesInTree = globaldata->Treenames.size();  }
+               
+               
+               //output any names that are in group file but not in tree
+               if (numNamesInTree < treeMap->getNumSeqs()) {
                        for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
                                //is that name in the tree?
                                int count = 0;
@@ -124,21 +219,70 @@ int ReadTreeCommand::execute(){
                                        count++;
                                }
                                
+                               if (m->control_pressed) {  
+                                       for (int i = 0; i < T.size(); i++) {  delete T[i];  }
+                                       globaldata->gTree.clear();
+                                       delete globaldata->gTreemap;
+                                       return 0;
+                               }
+                               
                                //then you did not find it so report it 
                                if (count == globaldata->Treenames.size()) { 
-                                       cout << treeMap->namesOfSeqs[i] << " is in your namefile and not in your tree. It will be disregarded." << endl;
+                                       //if it is in your namefile then don't remove
+                                       map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
+                                       
+                                       if (it == nameMap.end()) {
+                                               m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                                               treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
+                                       }
                                }
                        }
+                       
+                       globaldata->gTreemap = treeMap;
                }
-               
+
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTreeCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTreeCommand", "execute");   
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTreeCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+/*****************************************************************/
+int ReadTreeCommand::readNamesFile() {
+       try {
+               globaldata->names.clear();
+               numUniquesInName = 0;
+               
+               ifstream in;
+               m->openInputFile(namefile, in);
+               
+               string first, second;
+               map<string, string>::iterator itNames;
+               
+               while(!in.eof()) {
+                       in >> first >> second; m->gobble(in);
+                       
+                       numUniquesInName++;
+
+                       itNames = globaldata->names.find(first);
+                       if (itNames == globaldata->names.end()) {  
+                               globaldata->names[first] = second; 
+                               
+                               //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
+                               vector<string> dupNames;
+                               m->splitAtComma(second, dupNames);
+                               
+                               for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  if ((groupfile == "") && (i != 0)) { globaldata->gTreemap->addSeq(dupNames[i], "Group1"); }  }
+                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }                 
+               }
+               in.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "readNamesFile");
                exit(1);
        }
 }