]> git.donarmstrong.com Git - mothur.git/blobdiff - readtreecommand.cpp
fixed cluster.classic and added weighted method to hcluster
[mothur.git] / readtreecommand.cpp
index 0a86f36dfdced49cb6151205e82c0372556e1209..edfdf3b77e0d85ff51afce83323d0204ab987a66 100644 (file)
@@ -9,6 +9,41 @@
 
 #include "readtreecommand.h"
 
+//**********************************************************************************************************************
+vector<string> ReadTreeCommand::getValidParameters(){  
+       try {
+               string Array[] =  {"tree","group","name","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ReadTreeCommand::getRequiredParameters(){       
+       try {
+               string Array[] =  {"tree"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ReadTreeCommand::getRequiredFiles(){    
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ReadTreeCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 ReadTreeCommand::ReadTreeCommand(string option)  {
        try {
@@ -76,8 +111,20 @@ ReadTreeCommand::ReadTreeCommand(string option)  {
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }  
-                       else if (groupfile == "not found") { groupfile = ""; m->mothurOut("group is a required parameter for the read.tree command."); m->mothurOutEndLine(); abort = true;     }
-                       else {  
+                       else if (groupfile == "not found") { 
+                               groupfile = ""; 
+                               
+                               m->mothurOut("You have not provided a group file. I am assumming all sequence are from the same group."); m->mothurOutEndLine();        
+                               
+                               if (treefile != "") {  Tree* tree = new Tree(treefile); delete tree;  } //extracts names from tree to make faked out groupmap
+                               
+                               globaldata->setGroupFile(groupfile); 
+                               //read in group map info.
+                               treeMap = new TreeMap();
+                               for (int i = 0; i < globaldata->Treenames.size(); i++) { treeMap->addSeq(globaldata->Treenames[i], "Group1"); }
+                               globaldata->gTreemap = treeMap;
+                                       
+                       }else {  
                                globaldata->setGroupFile(groupfile); 
                                //read in group map info.
                                treeMap = new TreeMap(groupfile);
@@ -110,7 +157,7 @@ void ReadTreeCommand::help(){
                m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n");
                m->mothurOut("The read.tree command parameters are tree, group and name.\n");
                m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n");
-               m->mothurOut("The tree and group parameters are both required.\n");
+               m->mothurOut("The tree and group parameters are both required, if no group file is given then one group is assumed.\n");
                m->mothurOut("The name parameter allows you to enter a namefile.\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n");
        }
@@ -154,46 +201,47 @@ int ReadTreeCommand::execute(){
                }
 
                
-//             Sarah, isn't this checking already done when assigning the sequences to the groups?  it makes read.tree
-//             wicked slow...  For some reason my tree is coming in here eventhough the number of sequences in the tree
-//             agrees with the number of lines in the name file and the number of sequences represented by the name file
-//             agrees with the number of sequences in the group file
-
-               //output any names that are in group file but not in tree
+               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+               int numNamesInTree;
+               if (namefile != "")  {  
+                       if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       else {   numNamesInTree = globaldata->Treenames.size();  }
+               }else {  numNamesInTree = globaldata->Treenames.size();  }
                
-//             if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
-//                     cout << "in here" << endl;
-//                     for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
-//                             //is that name in the tree?
-//                             int count = 0;
-//                             for (int j = 0; j < globaldata->Treenames.size(); j++) {
-//                                     if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
-//                                     count++;
-//                             }
-//                             
-//                             if (m->control_pressed) {  
-//                                     for (int i = 0; i < T.size(); i++) {  delete T[i];  }
-//                                     globaldata->gTree.clear();
-//                                     delete globaldata->gTreemap;
-//                                     return 0;
-//                             }
-//                             
-//                             //then you did not find it so report it 
-//                             if (count == globaldata->Treenames.size()) { 
-//                                     //if it is in your namefile then don't remove
-//                                     map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
-//                                     
-//                                     if (it == nameMap.end()) {
-//                                             m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-//                                             treeMap->removeSeq(treeMap->namesOfSeqs[i]);
-//                                             i--; //need this because removeSeq removes name from namesOfSeqs
-//                                     }
-//                             }
-//                     }
-//                     
-//                     globaldata->gTreemap = treeMap;
-//             }
                
+               //output any names that are in group file but not in tree
+               if (numNamesInTree < treeMap->getNumSeqs()) {
+                       for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
+                               //is that name in the tree?
+                               int count = 0;
+                               for (int j = 0; j < globaldata->Treenames.size(); j++) {
+                                       if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
+                                       count++;
+                               }
+                               
+                               if (m->control_pressed) {  
+                                       for (int i = 0; i < T.size(); i++) {  delete T[i];  }
+                                       globaldata->gTree.clear();
+                                       delete globaldata->gTreemap;
+                                       return 0;
+                               }
+                               
+                               //then you did not find it so report it 
+                               if (count == globaldata->Treenames.size()) { 
+                                       //if it is in your namefile then don't remove
+                                       map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
+                                       
+                                       if (it == nameMap.end()) {
+                                               m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                                               treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
+                                       }
+                               }
+                       }
+                       
+                       globaldata->gTreemap = treeMap;
+               }
+
                return 0;
        }
        catch(exception& e) {
@@ -205,6 +253,7 @@ int ReadTreeCommand::execute(){
 int ReadTreeCommand::readNamesFile() {
        try {
                globaldata->names.clear();
+               numUniquesInName = 0;
                
                ifstream in;
                m->openInputFile(namefile, in);
@@ -215,6 +264,8 @@ int ReadTreeCommand::readNamesFile() {
                while(!in.eof()) {
                        in >> first >> second; m->gobble(in);
                        
+                       numUniquesInName++;
+
                        itNames = globaldata->names.find(first);
                        if (itNames == globaldata->names.end()) {  
                                globaldata->names[first] = second; 
@@ -223,8 +274,8 @@ int ReadTreeCommand::readNamesFile() {
                                vector<string> dupNames;
                                m->splitAtComma(second, dupNames);
                                
-                               for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  }
-                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); return 1; }                        
+                               for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  if ((groupfile == "") && (i != 0)) { globaldata->gTreemap->addSeq(dupNames[i], "Group1"); }  }
+                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }                 
                }
                in.close();