]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bug in read.tree updates help and validparameters strings, added ability for...
authorwestcott <westcott>
Wed, 8 Apr 2009 15:23:31 +0000 (15:23 +0000)
committerwestcott <westcott>
Wed, 8 Apr 2009 15:23:31 +0000 (15:23 +0000)
13 files changed:
globaldata.cpp
globaldata.hpp
helpcommand.cpp
parsimony.cpp
parsimonycommand.cpp
readtree.cpp
readtreecommand.cpp
tree.cpp
treemap.cpp
unifracunweightedcommand.cpp
unifracweightedcommand.cpp
unweighted.cpp
validparameter.cpp

index da81879f48fff6fbe87ac67ba950afd84c03a69e..67c49bd28c942d5c432538b6f965406c0b592ae6 100644 (file)
@@ -30,6 +30,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        clear();
                        gGroupmap = NULL;
                        gTree.clear();
+                       Treenames.clear();
                        labels.clear(); lines.clear(); groups.clear();
                        allLines = 1;
                }
@@ -325,3 +326,144 @@ GlobalData::~GlobalData() {
        if(gorder != NULL)                      {       delete gorder;          }
 }
 /*******************************************************/
+
+/*******************************************************/
+void GlobalData::parseTreeFile() {
+       //only takes names from the first tree and assumes that all trees use the same names.
+       try {
+               string filename = treefile;
+               ifstream filehandle;
+               openInputFile(filename, filehandle);
+               int c, comment;
+               comment = 0;
+               
+               //if you are not a nexus file 
+               if ((c = filehandle.peek()) != '#') {  
+                       while((c = filehandle.peek()) != ';') { 
+                               while ((c = filehandle.peek()) != ';') {
+                                       // get past comments
+                                       if(c == '[') {
+                                               comment = 1;
+                                       }
+                                       if(c == ']'){
+                                               comment = 0;
+                                       }
+                                       if((c == '(') && (comment != 1)){ break; }
+                                       filehandle.get();
+                               }
+
+                               readTreeString(filehandle); 
+                       }
+               //if you are a nexus file
+               }else if ((c = filehandle.peek()) == '#') {
+                       string holder = "";
+                                       
+                       // get past comments
+                       while(holder != "translate" && holder != "Translate"){  
+                               if(holder == "[" || holder == "[!"){
+                                       comment = 1;
+                               }
+                               if(holder == "]"){
+                                       comment = 0;
+                               }
+                               filehandle >> holder; 
+       
+                               //if there is no translate then you must read tree string otherwise use translate to get names
+                               if(holder == "tree" && comment != 1){   
+                                       //pass over the "tree rep.6878900 = "
+                                       while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
+
+                                       if (c == EOF ) { break; }
+                                       filehandle.putback(c);  //put back first ( of tree.
+                                       readTreeString(filehandle);     
+                                       break;
+                               }
+                       }
+                       
+                       //use nexus translation rather than parsing tree to save time
+                       if ((holder == "translate") || (holder == "Translate")) {
+cout << "there is a translate " << endl;
+                               string number, name, h;
+                               h = ""; // so it enters the loop the first time
+                               while((h != ";") && (number != ";")) { 
+                                       filehandle >> number;
+                                       filehandle >> name;
+       
+                                       //c = , until done with translation then c = ;
+                                       h = name.substr(name.length()-1, name.length()); 
+                                       name.erase(name.end()-1);  //erase the comma
+                                       Treenames.push_back(number);
+                               }
+                               if (number == ";") { Treenames.pop_back(); }  //in case ';' from translation is on next line instead of next to last name
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the GlobalData class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
+/*******************************************************/
+
+/*******************************************************/
+void GlobalData::readTreeString(ifstream& filehandle)  {
+       try {
+               int c;
+               string name; //k
+               
+               while((c = filehandle.peek()) != ';') { 
+                               //if you are a name
+                       if ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
+                               name = "";
+                               c = filehandle.get();
+       //              k = c;
+//cout << k << endl;
+                               while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {                       
+                                       name += c;
+                                       c = filehandle.get();
+               //      k = c;
+//cout << " in name while " << k << endl;
+                               }
+                               
+//cout << "name = " << name << endl;
+                               Treenames.push_back(name);
+                               filehandle.putback(c);
+//k = c;
+//cout << " after putback" <<  k << endl;
+                       } 
+                       
+                       if (c  == ':') { //read until you reach the end of the branch length
+                               while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
+                                       c = filehandle.get();
+                               //      k = c;
+       //cout << " in branch while " << k << endl;
+                               }
+                               filehandle.putback(c);
+                       }
+                       c = filehandle.get();
+                       if (c == ';') { break; }
+               //      k = c;
+//cout << k << endl;
+
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the GlobalData class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}      
+
+/*******************************************************/
+
+/*******************************************************/
+
+
index f74619e29c792d28486447c919281f88b5d03ba0..0abdd28cd18bcfc617ab105069cf15db5ead241b 100644 (file)
@@ -37,6 +37,7 @@ public:
        vector<string>  Estimators, Groups; //holds estimators to be used
        set<int> lines; //hold lines to be used
        set<string> labels; //holds labels to be used
+       vector<string> Treenames;
        
        string getPhylipFile();
        string getColumnFile();
@@ -80,6 +81,12 @@ public:
        void clearAbund();
        
        void parseGlobalData(string, string);
+       
+       void parseTreeFile();           //parses through tree file to find names of nodes and number of them
+                                                       //this is required in case user has sequences in the names file that are
+                                                       //not included in the tree. 
+                                                       //only takes names from the first tree in the tree file and assumes that all trees use the same names.
+
                
 private:
        string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups;
@@ -91,6 +98,7 @@ private:
        GlobalData();
        ~GlobalData();
        void reset();   //clears all non filename parameters
+       void readTreeString(ifstream&);
        
        
        
index 8e28cdeb1b1c41c9a9b353f3d8b94439ef3c5c86..05aaff0b0d58a46b380223b5b52ed84812e38c57 100644 (file)
@@ -72,21 +72,23 @@ int HelpCommand::execute(){
        }else if (globaldata->helpRequest == "collect.single") {
                cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n";
                cout << "The collect.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster." << "\n";
-               cout << "The collect.single command parameters are label, line, freq, calc.  No parameters are required, but you may not use " << "\n";
+               cout << "The collect.single command parameters are label, line, freq, calc and abund.  No parameters are required, but you may not use " << "\n";
                cout << "both the line and label parameters at the same time. The collect.single command should be in the following format: " << "\n";
                cout << "collect.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n";
-               cout << "Example collect(label=unique-.01-.03, line=0,5,10, iters=10000, freq=10, calc=sobs-chao-ace-jack)." << "\n";
+               cout << "Example collect(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-chao-ace-jack)." << "\n";
                cout << "The default values for freq is 100, and calc are sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson." << "\n";
                cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "collect.shared") {
                cout << "The collect.shared command can only be executed after a successful read.otu command." << "\n";
-               cout << "The collect.shared command parameters are label, line, freq, jumble, calc.  No parameters are required, but you may not use " << "\n";
+               cout << "The collect.shared command parameters are label, line, freq, jumble, calc and groups.  No parameters are required, but you may not use " << "\n";
                cout << "both the line and label parameters at the same time. The collect.shared command should be in the following format: " << "\n";
-               cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, jumble=yourJumble, calc=yourEstimators)." << "\n";
-               cout << "Example collect.shared(label=unique-.01-.03, line=0,5,10, freq=10, jumble=1, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n";
+               cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n";
+               cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, jumble=1, groups=B-C, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n";
                cout << "The default values for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble), freq is 100 and calc are sharedsobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN." << "\n";
+               cout << "The default value for groups is all the groups in your groupfile." << "\n";
                cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
+               cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "get.group") {
                cout << "The get.group command can only be executed after a successful read.otu command of a group file." << "\n";
@@ -109,26 +111,28 @@ int HelpCommand::execute(){
        }else if (globaldata->helpRequest == "rarefaction.single") {
                cout << "The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n";
                cout << "The rarefaction.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster." << "\n";
-               cout << "The rarefaction.single command parameters are label, line, iters, freq, calc.  No parameters are required, but you may not use " << "\n";
+               cout << "The rarefaction.single command parameters are label, line, iters, freq, calc and abund.  No parameters are required, but you may not use " << "\n";
                cout << "both the line and label parameters at the same time. The rarefaction.single command should be in the following format: " << "\n";
                cout << "rarefaction.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n";
-               cout << "Example rarefaction.single(label=unique-.01-.03, line=0,5,10, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson)." << "\n";
+               cout << "Example rarefaction.single(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson)." << "\n";
                cout << "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness." << "\n";
                cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "rarefaction.shared") {
                cout << "The rarefaction.shared command can only be executed after a successful read.otu command." << "\n";
-               cout << "The rarefaction.shared command parameters are label, line, iters, jumble and calc.  No parameters are required, but you may not use " << "\n";
+               cout << "The rarefaction.shared command parameters are label, line, iters, jumble, groups and calc.  No parameters are required, but you may not use " << "\n";
                cout << "both the line and label parameters at the same time. The rarefaction command should be in the following format: " << "\n";
-               cout << "rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, jumble=yourJumble, calc=yourEstimators)." << "\n";
-               cout << "Example rarefaction.shared(label=unique-.01-.03, line=0,5,10, iters=10000, jumble=1, calc=sharedobserved)." << "\n";
+               cout << "rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n";
+               cout << "Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, jumble=1, groups=B-C, calc=sharedobserved)." << "\n";
                cout << "The default values for iters is 1000, jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble), freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness." << "\n";
+               cout << "The default value for groups is all the groups in your groupfile." << "\n";
                cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
+               cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "summary.single") { 
                cout << "The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n";
                cout << "The summary.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster." << "\n";
-               cout << "The summary.single command parameters are label, line, calc.  No parameters are required, but you may not use " << "\n";
+               cout << "The summary.single command parameters are label, line, calc, abund.  No parameters are required, but you may not use " << "\n";
                cout << "both the line and label parameters at the same time. The summary.single command should be in the following format: " << "\n";
                cout << "summary.single(label=yourLabel, line=yourLines, calc=yourEstimators)." << "\n";
                cout << "Example summary.single(label=unique-.01-.03, line=0,5,10, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson)." << "\n";
@@ -139,10 +143,12 @@ int HelpCommand::execute(){
                cout << "The summary.shared command can only be executed after a successful read.otu command." << "\n";
                cout << "The summary.shared command parameters are label, line, jumble and calc.  No parameters are required, but you may not use " << "\n";
                cout << "both the line and label parameters at the same time. The summary.shared command should be in the following format: " << "\n";
-               cout << "summary.shared(label=yourLabel, line=yourLines, jumble=yourJumble, calc=yourEstimators)." << "\n";
-               cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, jumble=1, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n";
+               cout << "summary.shared(label=yourLabel, line=yourLines, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n";
+               cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, jumble=1, groups=B-C, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n";
                cout << "The default value for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble) and calc is sharedsobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN" << "\n";
+               cout << "The default value for groups is all the groups in your groupfile." << "\n";
                cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
+               cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "parsimony") { 
                cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n";
index 7d08adf8ad3b0a0ad5a25bc98bbf74d1bacda3b9..9a8706e82499fb80f9c2a55d61bc92c7b2399839 100644 (file)
@@ -75,7 +75,9 @@ EstOutput Parsimony::getValues(Tree* t) {
                        if (numGroups == 0) {
                                //get score for all users groups
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                       groups.push_back(tmap->namesOfGroups[i]);
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               groups.push_back(tmap->namesOfGroups[i]);
+                                       }
                                }
                        }else {
                                for (int i = 0; i < globaldata->Groups.size(); i++) {
index 190b7d6ce1b913c733e52b974451cb9b3d81664b..86f8d013c59449b96009ba07859be7e0985cc141 100644 (file)
@@ -352,9 +352,11 @@ void ParsimonyCommand::setGroups() {
                                if (globaldata->Groups.size() == 0) { 
                                        cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
                                        for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
-                                               numGroups++;
-                                               allGroups += tmap->namesOfGroups[i] + "-";
+                                               if (tmap->namesOfGroups[i] != "xxx") {
+                                                       allGroups += tmap->namesOfGroups[i] + "-";
+                                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                                       numGroups++;
+                                               }
                                        }
                                        allGroups = allGroups.substr(0, allGroups.length()-1);
                                }else {
@@ -367,16 +369,20 @@ void ParsimonyCommand::setGroups() {
                        }else{//user has enter "all" and wants the default groups
                                globaldata->Groups.clear();
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
-                                       numGroups++;
-                                       allGroups += tmap->namesOfGroups[i] + "-";
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                               numGroups++;
+                                               allGroups += tmap->namesOfGroups[i] + "-";
+                                       }
                                }
                                allGroups = allGroups.substr(0, allGroups.length()-1);
                                globaldata->setGroups("");
                        }
                }else {
                        for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                               allGroups += tmap->namesOfGroups[i] + "-";
+                               if (tmap->namesOfGroups[i] != "xxx") {
+                                       allGroups += tmap->namesOfGroups[i] + "-";
+                               }
                        }
                        allGroups = allGroups.substr(0, allGroups.length()-1);
                        numGroups = 1;
index 029164452c49d2247bd97262369a649cfbd3d190..d3790df170ab0651c24987bc3541b62f48197c5f 100644 (file)
@@ -100,11 +100,9 @@ float ReadTree::readBranchLength(istream& f) {
        }               
 }
 
-
 /***********************************************************************/
 /***********************************************************************/
 
-
 //Child Classes Below
 
 /***********************************************************************/
@@ -113,12 +111,25 @@ float ReadTree::readBranchLength(istream& f) {
 
 int ReadNewickTree::read() {
        try {
+               holder = "";
                int c, error;
                int comment = 0;
                
                //if you are not a nexus file 
                if ((c = filehandle.peek()) != '#') {  
                        while((c = filehandle.peek()) != EOF) { 
+                               while ((c = filehandle.peek()) != EOF) {
+                                       // get past comments
+                                       if(c == '[') {
+                                               comment = 1;
+                                       }
+                                       if(c == ']'){
+                                               comment = 0;
+                                       }
+                                       if((c == '(') && (comment != 1)){ break; }
+                                       filehandle.get();
+                               }
+
                                //make new tree
                                T = new Tree(); 
                                numNodes = T->getNumNodes();
@@ -164,6 +175,8 @@ int ReadNewickTree::read() {
                                globaldata->gTree.push_back(T); 
                        }
                }
+               
+               if (error != 0) { readOk = error; } 
                return readOk;
        }
        catch(exception& e) {
@@ -236,7 +249,7 @@ int ReadNewickTree::readTreeString() {
                        n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
 
                        lc = readNewickInt(filehandle, n, T);
-                       if (lc == -1) { return -1; } //reports an error in reading
+                       if (lc == -1) { cout << "error with lc" << endl; return -1; } //reports an error in reading
                
                        if(filehandle.peek()==','){                                                     
                                readSpecialChar(filehandle,',',"comma");
@@ -247,7 +260,7 @@ int ReadNewickTree::readTreeString() {
                        }                                                                                               
                        if(rooted != 1){                                                                
                                rc = readNewickInt(filehandle, n, T);
-                               if (rc == -1) { return -1; } //reports an error in reading
+                               if (rc == -1) { cout << "error with rc" << endl; return -1; } //reports an error in reading
                                if(filehandle.peek() == ')'){                                   
                                        readSpecialChar(filehandle,')',"right parenthesis");
                                }                                                                                       
@@ -331,7 +344,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                        }
                
                        int blen = 0;
-                       if(d == ':')    {               blen = 1;                       }               
+                       if(d == ':')    {               blen = 1;       }               
                
                        f.putback(d);
                
@@ -342,27 +355,22 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                        int n1 = T->getIndex(name);
                        
                        //adds sequence names that are not in group file to the "xxx" group
-                       if(n1 == -1) {
-                               cerr << "Name: " << name << " not found in your groupfile. \n"; readOk = -1; return n1;
+                       if(group == "not found") {
+                               cout << "Name: " << name << " is not in your groupfile, and will be disregarded. \n";  //readOk = -1; return n1;
                                
-                               //globaldata->gTreemap->namesOfSeqs.push_back(name);
-                               //globaldata->gTreemap->treemap[name].groupname = "xxx";
-                               //globaldata->gTreemap->treemap[name].vectorIndex = (globaldata->gTreemap->namesOfSeqs.size() - 1);
+                               globaldata->gTreemap->namesOfSeqs.push_back(name);
+                               globaldata->gTreemap->treemap[name].groupname = "xxx";
                                
-                               //map<string, int>::iterator it;
-                               //it = globaldata->gTreemap->seqsPerGroup.find("xxx");
-                               //if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
-                               //      globaldata->gTreemap->namesOfGroups.push_back("xxx");
-                               //      globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
-                               //}else {
-                               //      globaldata->gTreemap->seqsPerGroup["xxx"]++;
-                               //}
+                               map<string, int>::iterator it;
+                               it = globaldata->gTreemap->seqsPerGroup.find("xxx");
+                               if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
+                                       globaldata->gTreemap->namesOfGroups.push_back("xxx");
+                                       globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
+                               }else {
+                                       globaldata->gTreemap->seqsPerGroup["xxx"]++;
+                               }
                                
-                               //find index in tree of name
-                               //n1 = T->getIndex(name);
-                               //group = "xxx";
-                               //numLeaves++;
-                               //numNodes = 2*numLeaves - 1;
+                               group = "xxx";
                        }
                        
                        T->tree[n1].setGroup(group);
index 6af28ad7788a351e1a320aaf433c800c2c529f1b..19ab67531dc96e8929b258eef2a41b4d24538bc4 100644 (file)
@@ -22,6 +22,9 @@ ReadTreeCommand::ReadTreeCommand(){
                //memory leak prevention
                //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
                globaldata->gTreemap = treeMap;
+               
+               //get names in tree
+               globaldata->parseTreeFile();
 
                read = new ReadNewickTree(filename);
                
@@ -59,6 +62,23 @@ int ReadTreeCommand::execute(){
                        T[i]->assembleTree();
                }
 
+               //output any names that are in names file but not in tree
+               if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
+                       for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
+                               //is that name in the tree?
+                               int count = 0;
+                               for (int j = 0; j < globaldata->Treenames.size(); j++) {
+                                       if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
+                                       count++;
+                               }
+                               
+                               //then you did not find it so report it 
+                               if (count == globaldata->Treenames.size()) { 
+                                       cout << treeMap->namesOfSeqs[i] << " is in your namefile and not in your tree. It will be disregarded." << endl;
+                               }
+                       }
+               }
+               
                return 0;
        }
        catch(exception& e) {
index 22892e2cbd7c9738261d5632492eafe674e7471b..6aa1b829c91d59bcee681b906c6c31ab07f1c168 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -13,9 +13,9 @@
 /*****************************************************************/
 Tree::Tree() {
        try {
-       
                globaldata = GlobalData::getInstance();
-               numLeaves = globaldata->gTreemap->getNumSeqs();
+               
+               numLeaves = globaldata->Treenames.size();
                numNodes = 2*numLeaves - 1;
                
                tree.resize(numNodes);
@@ -24,13 +24,13 @@ Tree::Tree() {
                for (int i = 0; i < numNodes; i++) {
                        //initialize leaf nodes
                        if (i <= (numLeaves-1)) {
-                               tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
-                               tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
+                               tree[i].setName(globaldata->Treenames[i]);
+                               tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
                                //set pcount and pGroup for groupname to 1.
-                               tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
-                               tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
+                               tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
+                               tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
                                //Treemap knows name, group and index to speed up search
-                               globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
+                               globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
        
                        //intialize non leaf nodes
                        }else if (i > (numLeaves-1)) {
@@ -486,7 +486,7 @@ int Tree::findRoot() {
        try {
                for (int i = 0; i < numNodes; i++) {
                        //you found the root
-                       if (tree[i].getParent() == -1) { return i; }
+                       if (tree[i].getParent() == -1) { return i; }  
                }
                return -1;
        }
index 9eb8330ad1917a26441b8c22527b50652dbd011a..fee309c5877138039c2ecdff8f54428a8060bc24 100644 (file)
@@ -72,7 +72,13 @@ string TreeMap::getGroup(string sequenceName) {
 }
 /************************************************************/
 void TreeMap::setIndex(string seq, int index) {
-       treemap[seq].vectorIndex = index;
+       it = treemap.find(seq);
+       if (it != treemap.end()) { //sequence name was in group file
+               treemap[seq].vectorIndex = index;       
+       }else {
+               treemap[seq].vectorIndex = index;
+               treemap[seq].groupname = "not found";
+       }
 }
 /************************************************************/
 int TreeMap::getIndex(string seq) {
index 65f30ebc949f4945023fd295224d00d52827d82b..2b93df782bccc4d2dbc8533e7cdc38169059d6a7 100644 (file)
@@ -211,9 +211,11 @@ void UnifracUnweightedCommand::setGroups() {
                                if (globaldata->Groups.size() == 0) { 
                                        cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
                                        for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
-                                               numGroups++;
-                                               allGroups += tmap->namesOfGroups[i] + "-";
+                                               if (tmap->namesOfGroups[i] != "xxx") {
+                                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                                       numGroups++;
+                                                       allGroups += tmap->namesOfGroups[i] + "-";
+                                               }
                                        }
                                        allGroups = allGroups.substr(0, allGroups.length()-1);
                                }else {
@@ -226,16 +228,20 @@ void UnifracUnweightedCommand::setGroups() {
                        }else{//user has enter "all" and wants the default groups
                                globaldata->Groups.clear();
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
-                                       numGroups++;
-                                       allGroups += tmap->namesOfGroups[i] + "-";
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                               numGroups++;
+                                               allGroups += tmap->namesOfGroups[i] + "-";
+                                       }
                                }
                                allGroups = allGroups.substr(0, allGroups.length()-1);
                                globaldata->setGroups("");
                        }
                }else {
                        for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                               allGroups += tmap->namesOfGroups[i] + "-";
+                               if (tmap->namesOfGroups[i] != "xxx") {
+                                       allGroups += tmap->namesOfGroups[i] + "-";
+                               }
                        }
                        allGroups = allGroups.substr(0, allGroups.length()-1);
                        numGroups = 1;
index 0a4c7facec93938af068ebad350307ab6f5d4c0d..c2584eabfe07975d103c17c6d93e96e2ae1770e2 100644 (file)
@@ -227,11 +227,14 @@ int UnifracWeightedCommand::findIndex(float score, int index) {
 /***********************************************************/
 void UnifracWeightedCommand::setGroups() {
        try {
+               numGroups = 0;
                //if the user has not entered specific groups to analyze then do them all
                if (globaldata->Groups.size() == 0) {
-                       numGroups = tmap->getNumGroups();
-                       for (int i=0; i < numGroups; i++) { 
-                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                       for (int i=0; i < tmap->getNumGroups(); i++) { 
+                               if (tmap->namesOfGroups[i] != "xxx") {
+                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                       numGroups++;
+                               }
                        }
                }else {
                        if (globaldata->getGroups() != "all") {
@@ -246,25 +249,31 @@ void UnifracWeightedCommand::setGroups() {
                        
                                //if the user only entered invalid groups
                                if (globaldata->Groups.size() == 0) { 
-                                       numGroups = tmap->getNumGroups();
-                                       for (int i=0; i < numGroups; i++) { 
-                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                       for (int i=0; i < tmap->getNumGroups(); i++) { 
+                                               if (tmap->namesOfGroups[i] != "xxx") {
+                                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                                       numGroups++;
+                                               }
                                        }
                                        cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
                                }else if (globaldata->Groups.size() == 1) { 
                                        cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
-                                       numGroups = tmap->getNumGroups();
                                        globaldata->Groups.clear();
-                                       for (int i=0; i < numGroups; i++) { 
-                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                       for (int i=0; i < tmap->getNumGroups(); i++) { 
+                                               if (tmap->namesOfGroups[i] != "xxx") {
+                                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                                       numGroups++;
+                                               }
                                        }
                                }else { numGroups = globaldata->Groups.size(); }
                        }else { //users wants all groups
-                               numGroups = tmap->getNumGroups();
                                globaldata->Groups.clear();
                                globaldata->setGroups("");
-                               for (int i=0; i < numGroups; i++) { 
-                                       globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                               for (int i=0; i < tmap->getNumGroups(); i++) { 
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                               numGroups++;
+                                       }
                                }
                        }
                }
index 3694d263fb1039ea5208926301b7bd18a38875a3..b3f50c8895371e326b0e3c72614e4c1c4f92888b 100644 (file)
@@ -109,7 +109,9 @@ EstOutput Unweighted::getValues(Tree* t) {
                        if (numGroups == 0) {
                                //get score for all users groups
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                       groups.push_back(tmap->namesOfGroups[i]);
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               groups.push_back(tmap->namesOfGroups[i]);
+                                       }
                                }
                        }else {
                                for (int i = 0; i < globaldata->Groups.size(); i++) {
@@ -297,7 +299,9 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                        if (numGroups == 0) {
                                //get score for all users groups
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
-                                       groups.push_back(tmap->namesOfGroups[i]);
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               groups.push_back(tmap->namesOfGroups[i]);
+                                       }
                                }
                        }else {
                                for (int i = 0; i < globaldata->Groups.size(); i++) {
index cb6db162b5ea52262fa51fc520ed6a41f99cb30b..f4780a710a40a0c9ea5d621f6f354c2503f6196a 100644 (file)
@@ -229,10 +229,10 @@ void ValidParameters::initCommandParameters() {
        try {   
                //{"parameter1","parameter2",...,"last parameter"};
                
-               string readdistArray[] = {"phylip","column", "name","cutoff","precision"};
+               string readdistArray[] = {"phylip","column","name","cutoff","precision","group"};
                commandParameters["read.dist"] = addParameters(readdistArray, sizeof(readdistArray)/sizeof(string));
 
-               string readotuArray[] =  {"list","order","shared", "line", "label","group","shared", "sabund", "rabund"};
+               string readotuArray[] =  {"list","order","shared", "line", "label","group","sabund", "rabund"};
                commandParameters["read.otu"] = addParameters(readotuArray, sizeof(readotuArray)/sizeof(string));
                
                string readtreeArray[] = {"tree","group"};
@@ -244,7 +244,7 @@ void ValidParameters::initCommandParameters() {
                string deconvoluteArray[] =  {"fasta"};
                commandParameters["deconvolute"] = addParameters(deconvoluteArray, sizeof(deconvoluteArray)/sizeof(string));
                
-               string collectsingleArray[] =  {"freq","line","label","calc","precision","abund"};
+               string collectsingleArray[] =  {"freq","line","label","calc","abund"};
                commandParameters["collect.single"] = addParameters(collectsingleArray, sizeof(collectsingleArray)/sizeof(string));
 
                string collectsharedArray[] =  {"jumble","freq","line","label","calc","groups"};
@@ -262,7 +262,7 @@ void ValidParameters::initCommandParameters() {
                string rarefactionsingleArray[] =  {"iters","freq","line","label","calc","abund"};
                commandParameters["rarefaction.single"] = addParameters(rarefactionsingleArray, sizeof(rarefactionsingleArray)/sizeof(string));
 
-               string rarefactionsharedArray[] =  {"iters","jumble","line","label","calc"};
+               string rarefactionsharedArray[] =  {"iters","jumble","line","label","calc","groups"};
                commandParameters["rarefaction.shared"] = addParameters(rarefactionsharedArray, sizeof(rarefactionsharedArray)/sizeof(string));
                
                string libshuffArray[] =  {"iters","groups","step","form","cutoff"};
@@ -271,19 +271,19 @@ void ValidParameters::initCommandParameters() {
                string summarysingleArray[] =  {"line","label","calc","abund"};
                commandParameters["summary.single"] = addParameters(summarysingleArray, sizeof(summarysingleArray)/sizeof(string));
 
-               string summarysharedArray[] =  {"jumble","line","label","calc"};
+               string summarysharedArray[] =  {"jumble","line","label","calc","groups"};
                commandParameters["summary.shared"] = addParameters(summarysharedArray, sizeof(summarysharedArray)/sizeof(string));
 
-               string parsimonyArray[] =  {"random","group","iters"};
+               string parsimonyArray[] =  {"random","groups","iters"};
                commandParameters["parsimony"] = addParameters(parsimonyArray, sizeof(parsimonyArray)/sizeof(string));
 
-               string unifracWeightedArray[] =  {"group","iters"};
+               string unifracWeightedArray[] =  {"groups","iters"};
                commandParameters["unifrac.weighted"] = addParameters(unifracWeightedArray, sizeof(unifracWeightedArray)/sizeof(string));
 
-               string unifracUnweightedArray[] =  {"group","iters"};
+               string unifracUnweightedArray[] =  {"groups","iters"};
                commandParameters["unifrac.unweighted"] = addParameters(unifracUnweightedArray, sizeof(unifracUnweightedArray)/sizeof(string));
 
-               string heatmapArray[] =  {"group","line","label","sorted"};
+               string heatmapArray[] =  {"groups","line","label","sorted"};
                commandParameters["heatmap"] = addParameters(heatmapArray, sizeof(heatmapArray)/sizeof(string));
 
                string vennArray[] =  {"groups","line","label","calc"};
@@ -339,8 +339,8 @@ void ValidParameters::initParameterRanges() {
                string freqArray[] = {">","1", "<","NA", "between"};
                parameterRanges["freq"] = addParameters(freqArray, rangeSize);
 
-               string lineArray[] = {">=","1", "<","NA", "between"};
-               parameterRanges["line"] = addParameters(lineArray, rangeSize);
+               //string lineArray[] = {">=","1", "<","NA", "between"};
+               //parameterRanges["line"] = addParameters(lineArray, rangeSize);
 
                string abundArray[] = {">=","5", "<","NA", "between"};
                parameterRanges["abund"] = addParameters(abundArray, rangeSize);