]> git.donarmstrong.com Git - mothur.git/blobdiff - classifytreecommand.cpp
added phylip as output file type for commands that output distance matrices. added...
[mothur.git] / classifytreecommand.cpp
index e1ef6d304227f7b523ebb9540373a58cdd2339a0..7861a01bab3420f1598e351b30765dbd0ae5f124 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "classifytreecommand.h"
 #include "phylotree.h"
+#include "treereader.h"
 
 //**********************************************************************************************************************
 vector<string> ClassifyTreeCommand::setParameters(){   
@@ -33,7 +34,7 @@ vector<string> ClassifyTreeCommand::setParameters(){
 string ClassifyTreeCommand::getHelpString(){   
        try {
                string helpString = "";
-               helpString += "The classify.tree command reads a tree and taxonomy file and output the concensus taxonomy for each node on the tree. \n";
+               helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
                helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
                helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
                helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
@@ -48,7 +49,27 @@ string ClassifyTreeCommand::getHelpString(){
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getOutputFileNameTag(string type, string inputName=""){    
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "tree") {  outputFileName =  "taxonomy.tre"; }
+            else if (type == "summary") {  outputFileName =  "taxonomy.summary"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 ClassifyTreeCommand::ClassifyTreeCommand(){    
        try {
@@ -86,12 +107,6 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
-                       m->runParse = true;
-                       m->clearGroups();
-                       m->clearAllGroups();
-                       m->Treenames.clear();
-                       m->names.clear();
-                       
                        vector<string> tempOutNames;
                        outputTypes["tree"] = tempOutNames;
                        outputTypes["summary"] = tempOutNames;
@@ -195,73 +210,18 @@ int ClassifyTreeCommand::execute(){
                //    reading tree info                                                    //
                /***************************************************/
         m->setTreeFile(treefile);
-        if (groupfile != "") {
-                       //read in group map info.
-                       tmap = new TreeMap(groupfile);
-                       tmap->readMap();
-               }else{ //fake out by putting everyone in one group
-                       Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
-                       tmap = new TreeMap();
-                       
-                       for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
-               }
-               
-               if (namefile != "") { readNamesFile(); }
-               
-               read = new ReadNewickTree(treefile);
-               int readOk = read->read(tmap); 
-               
-               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-               
-               read->AssembleTrees();
-               vector<Tree*> T = read->getTrees();
-        Tree* outputTree = T[0]; 
-               delete read;
-               
-               //make sure all files match
-               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
-               int numNamesInTree;
-               if (namefile != "")  {  
-                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
-                       else {   numNamesInTree = m->Treenames.size();  }
-               }else {  numNamesInTree = m->Treenames.size();  }
-               
-               
-               //output any names that are in group file but not in tree
-               if (numNamesInTree < tmap->getNumSeqs()) {
-                       for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
-                               //is that name in the tree?
-                               int count = 0;
-                               for (int j = 0; j < m->Treenames.size(); j++) {
-                                       if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
-                                       count++;
-                               }
-                               
-                               if (m->control_pressed) { 
-                                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
-                                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
-                                       m->clearGroups();
-                                       return 0;
-                               }
-                               
-                               //then you did not find it so report it 
-                               if (count == m->Treenames.size()) { 
-                                       //if it is in your namefile then don't remove
-                                       map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-                                       
-                                       if (it == nameMap.end()) {
-                                               m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-                                               tmap->removeSeq(tmap->namesOfSeqs[i]);
-                                               i--; //need this because removeSeq removes name from namesOfSeqs
-                                       }
-                               }
-                       }
-               }
+        
+        TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+        vector<Tree*> T = reader->getTrees();
+        TreeMap* tmap = T[0]->getTreeMap();
+        Tree* outputTree = T[0];
+        delete reader;
+
+        if (namefile != "") { m->readNames(namefile, nameMap, nameCount); }
                         
-        if (m->control_pressed) { delete outputTree; delete tmap;  return 0; }
+        if (m->control_pressed) { delete tmap;  delete outputTree;  return 0; }
                
-        readTaxonomyFile();
-        
+        m->readTax(taxonomyfile, taxMap);
         
         /***************************************************/
         //             get concensus taxonomies                    //
@@ -302,7 +262,7 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(treefile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary";
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("summary");
                outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
                
                ofstream out;
@@ -316,7 +276,7 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){
                
                string treeOutputDir = outputDir;
                if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
-               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre";
+               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
                
                //create a map from tree node index to names of descendants, save time later
                map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
@@ -484,6 +444,7 @@ map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i
                
                int lc = T->tree[i].getLChild();
                int rc = T->tree[i].getRChild();
+        TreeMap* tmap = T->getTreeMap();
                
                if (lc == -1) { //you are a leaf your only descendant is yourself
             string group = tmap->getGroup(T->tree[i].getName());
@@ -512,68 +473,6 @@ map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i
                exit(1);
        }
 }
-//**********************************************************************************************************************
-int ClassifyTreeCommand::readTaxonomyFile() {
-       try {
-               
-               ifstream in;
-               m->openInputFile(taxonomyfile, in);
-               
-               string name, tax;
-        
-               while(!in.eof()){
-                       in >> name >> tax;              
-                       m->gobble(in);
-                       
-                       //are there confidence scores, if so remove them
-                       if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
-                       
-                       taxMap[name] = tax;
-                       
-                       if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
-               }
-               in.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile");
-               exit(1);
-       }
-}
-
-/*****************************************************************/
-int ClassifyTreeCommand::readNamesFile() {
-       try {
-               ifstream inNames;
-               m->openInputFile(namefile, inNames);
-               
-               string name, names;
-        
-               while(!inNames.eof()){
-                       inNames >> name;                        //read from first column  A
-                       inNames >> names;               //read from second column  A,B,C,D
-                       m->gobble(inNames);
-                       
-                       //parse names into vector
-                       vector<string> theseNames;
-                       m->splitAtComma(names, theseNames);
-            
-                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
-            nameCount[name] = theseNames.size();
-                       
-                       if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
-               }
-               inNames.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyTreeCommand", "readNamesFile");
-               exit(1);
-       }
-}
-
 /*****************************************************************/