]> git.donarmstrong.com Git - mothur.git/blobdiff - classifytreecommand.cpp
fixes while testing 1.33.0
[mothur.git] / classifytreecommand.cpp
index bcf27698ce2bf68bf3dc6a3909228a9d4ccd5cb2..79a82454d14304d997a2a46efbc62ffbeac63f60 100644 (file)
 //**********************************************************************************************************************
 vector<string> ClassifyTreeCommand::setParameters(){   
        try {
-               CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree);
-        CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy);
-        CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname);
-        CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup);
-        CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none","tree-summary",false,true,true); parameters.push_back(ptree);
+        CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none","",false,true,true); parameters.push_back(ptaxonomy);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+        CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -37,8 +38,9 @@ string ClassifyTreeCommand::getHelpString(){
                helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
                helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
                helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
+        helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
                helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
-               helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n";
+               helpString += "The classify.tree command parameters are tree, group, name, count and taxonomy. The tree and taxonomy files are required.\n";
         helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
         helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n";
                helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; 
@@ -49,7 +51,22 @@ string ClassifyTreeCommand::getHelpString(){
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "summary") {  pattern = "[filename],taxonomy.summary"; } //makes file like: amazon.0.03.fasta
+        else if (type == "tree") {  pattern = "[filename],taxonomy.tre"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ClassifyTreeCommand", "getOutputPattern");
+        exit(1);
+    }
+}
 //**********************************************************************************************************************
 ClassifyTreeCommand::ClassifyTreeCommand(){    
        try {
@@ -127,6 +144,14 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -158,16 +183,30 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
                        else if (groupfile == "not found") { groupfile = ""; }
                        else { m->setGroupFile(groupfile); }
             
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
             string temp = validParameter.validFile(parameters, "cutoff", false);                       if (temp == "not found") { temp = "51"; }
                        m->mothurConvert(temp, cutoff); 
                        
                        if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
             
-            if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                }
                        }
-                       
                }
        }
        catch(exception& e) {
@@ -193,15 +232,15 @@ int ClassifyTreeCommand::execute(){
         
         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
         vector<Tree*> T = reader->getTrees();
-        TreeMap* tmap = T[0]->getTreeMap();
+        CountTable* tmap = T[0]->getCountTable();
         Tree* outputTree = T[0];
         delete reader;
 
-        if (namefile != "") { readNamesFile(); }
+        if (namefile != "") { m->readNames(namefile, nameMap, nameCount); }
                         
         if (m->control_pressed) { delete tmap;  delete outputTree;  return 0; }
                
-        readTaxonomyFile();
+        m->readTax(taxonomyfile, taxMap);
         
         /***************************************************/
         //             get concensus taxonomies                    //
@@ -242,7 +281,9 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(treefile);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary";
+        map<string, string> variables; 
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(treefile));
+               string outputFileName = getOutputFileName("summary", variables);
                outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
                
                ofstream out;
@@ -256,12 +297,13 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){
                
                string treeOutputDir = outputDir;
                if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
-               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre";
+        variables["[filename]"] = treeOutputDir + m->getRootName(m->getSimpleName(treefile));
+               string outputTreeFileName = getOutputFileName("tree", variables);
                
                //create a map from tree node index to names of descendants, save time later
                map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
                for (int i = 0; i < T->getNumNodes(); i++) {
-                       if (m->control_pressed) { return 0; }
+            if (m->control_pressed) { return 0; }
                        
                        nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants);
                }
@@ -285,7 +327,7 @@ int ClassifyTreeCommand::getClassifications(Tree*& T){
                 tax = getTaxonomy(nodeToDescendants[i][group], size);
                 out << (i+1) << '\t' << size << '\t' << tax << endl;
             }
-                               
+               
                        T->tree[i].setLabel((i+1));
                }
                out.close();
@@ -314,7 +356,6 @@ string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
                
                for (set<string>::iterator it = names.begin(); it != names.end(); it++) {
             
-            
                        //if namesfile include the names
                        if (namefile != "") {
                 
@@ -335,7 +376,7 @@ string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
                                        }else{
                                                //add seq to tree
                         int num = nameCount[(*it)]; // we know its there since we found it in nameMap
-                                               for (int i = 0; i < num; i++) {  phylo->addSeqToTree((*it)+toString(i), it2->second);  }
+                                               for (int i = 0; i < num; i++) {  phylo->addSeqToTree((*it)+toString(i), itTax->second);  }
                         size += num;
                                        }
                                }
@@ -347,10 +388,15 @@ string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
                                if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
                                        m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
                                }else{
-                                       //add seq to tree
-                                       phylo->addSeqToTree((*it), itTax->second);
-                    size++;
-                               }
+                                       if (countfile != "") {
+                        int numDups = ct->getNumSeqs((*it)); 
+                        for (int j = 0; j < numDups; j++) {  phylo->addSeqToTree((*it), itTax->second);  }
+                        size += numDups;
+                    }else{
+                        //add seq to tree
+                        phylo->addSeqToTree((*it), itTax->second);
+                        size++;  
+                    }                          }
                        }
             
                        if (m->control_pressed) { delete phylo; return conTax; }
@@ -424,12 +470,12 @@ map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i
                
                int lc = T->tree[i].getLChild();
                int rc = T->tree[i].getRChild();
-        TreeMap* tmap = T->getTreeMap();
+       // TreeMap* tmap = T->getTreeMap();
                
                if (lc == -1) { //you are a leaf your only descendant is yourself
-            string group = tmap->getGroup(T->tree[i].getName());
+            vector<string> groups = T->tree[i].getGroup();
             set<string> mynames; mynames.insert(T->tree[i].getName());
-            names[group] = mynames; //mygroup -> me
+            for (int j = 0; j < groups.size(); j++) { names[groups[j]] = mynames;   } //mygroup -> me
             names["AllGroups"] = mynames;
                }else{ //your descedants are the combination of your childrens descendants
                        names = descendants[lc];
@@ -453,68 +499,6 @@ map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i
                exit(1);
        }
 }
-//**********************************************************************************************************************
-int ClassifyTreeCommand::readTaxonomyFile() {
-       try {
-               
-               ifstream in;
-               m->openInputFile(taxonomyfile, in);
-               
-               string name, tax;
-        
-               while(!in.eof()){
-                       in >> name >> tax;              
-                       m->gobble(in);
-                       
-                       //are there confidence scores, if so remove them
-                       if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
-                       
-                       taxMap[name] = tax;
-                       
-                       if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
-               }
-               in.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile");
-               exit(1);
-       }
-}
-
-/*****************************************************************/
-int ClassifyTreeCommand::readNamesFile() {
-       try {
-               ifstream inNames;
-               m->openInputFile(namefile, inNames);
-               
-               string name, names;
-        
-               while(!inNames.eof()){
-                       inNames >> name;                        //read from first column  A
-                       inNames >> names;               //read from second column  A,B,C,D
-                       m->gobble(inNames);
-                       
-                       //parse names into vector
-                       vector<string> theseNames;
-                       m->splitAtComma(names, theseNames);
-            
-                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
-            nameCount[name] = theseNames.size();
-                       
-                       if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
-               }
-               inNames.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyTreeCommand", "readNamesFile");
-               exit(1);
-       }
-}
-
 /*****************************************************************/