From 2e5ec5cbbd23637ed20da9bdd544d178a3b5d949 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 8 Apr 2009 15:23:31 +0000 Subject: [PATCH] fixed bug in read.tree updates help and validparameters strings, added ability for user to leave names out of the groupfile or add extra names with read.tree. --- globaldata.cpp | 142 +++++++++++++++++++++++++++++++++++ globaldata.hpp | 8 ++ helpcommand.cpp | 32 ++++---- parsimony.cpp | 4 +- parsimonycommand.cpp | 20 +++-- readtree.cpp | 54 +++++++------ readtreecommand.cpp | 20 +++++ tree.cpp | 16 ++-- treemap.cpp | 8 +- unifracunweightedcommand.cpp | 20 +++-- unifracweightedcommand.cpp | 33 +++++--- unweighted.cpp | 8 +- validparameter.cpp | 22 +++--- 13 files changed, 302 insertions(+), 85 deletions(-) diff --git a/globaldata.cpp b/globaldata.cpp index da81879..67c49bd 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -30,6 +30,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ clear(); gGroupmap = NULL; gTree.clear(); + Treenames.clear(); labels.clear(); lines.clear(); groups.clear(); allLines = 1; } @@ -325,3 +326,144 @@ GlobalData::~GlobalData() { if(gorder != NULL) { delete gorder; } } /*******************************************************/ + +/*******************************************************/ +void GlobalData::parseTreeFile() { + //only takes names from the first tree and assumes that all trees use the same names. + try { + string filename = treefile; + ifstream filehandle; + openInputFile(filename, filehandle); + int c, comment; + comment = 0; + + //if you are not a nexus file + if ((c = filehandle.peek()) != '#') { + while((c = filehandle.peek()) != ';') { + while ((c = filehandle.peek()) != ';') { + // get past comments + if(c == '[') { + comment = 1; + } + if(c == ']'){ + comment = 0; + } + if((c == '(') && (comment != 1)){ break; } + filehandle.get(); + } + + readTreeString(filehandle); + } + //if you are a nexus file + }else if ((c = filehandle.peek()) == '#') { + string holder = ""; + + // get past comments + while(holder != "translate" && holder != "Translate"){ + if(holder == "[" || holder == "[!"){ + comment = 1; + } + if(holder == "]"){ + comment = 0; + } + filehandle >> holder; + + //if there is no translate then you must read tree string otherwise use translate to get names + if(holder == "tree" && comment != 1){ + //pass over the "tree rep.6878900 = " + while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;} + + if (c == EOF ) { break; } + filehandle.putback(c); //put back first ( of tree. + readTreeString(filehandle); + break; + } + } + + //use nexus translation rather than parsing tree to save time + if ((holder == "translate") || (holder == "Translate")) { +cout << "there is a translate " << endl; + string number, name, h; + h = ""; // so it enters the loop the first time + while((h != ";") && (number != ";")) { + filehandle >> number; + filehandle >> name; + + //c = , until done with translation then c = ; + h = name.substr(name.length()-1, name.length()); + name.erase(name.end()-1); //erase the comma + Treenames.push_back(number); + } + if (number == ";") { Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name + } + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GlobalData class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************/ + +/*******************************************************/ +void GlobalData::readTreeString(ifstream& filehandle) { + try { + int c; + string name; //k + + while((c = filehandle.peek()) != ';') { + //if you are a name + if ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space + name = ""; + c = filehandle.get(); + // k = c; +//cout << k << endl; + while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) { + name += c; + c = filehandle.get(); + // k = c; +//cout << " in name while " << k << endl; + } + +//cout << "name = " << name << endl; + Treenames.push_back(name); + filehandle.putback(c); +//k = c; +//cout << " after putback" << k << endl; + } + + if (c == ':') { //read until you reach the end of the branch length + while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { + c = filehandle.get(); + // k = c; + //cout << " in branch while " << k << endl; + } + filehandle.putback(c); + } + c = filehandle.get(); + if (c == ';') { break; } + // k = c; +//cout << k << endl; + + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GlobalData class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/*******************************************************/ + +/*******************************************************/ + + diff --git a/globaldata.hpp b/globaldata.hpp index f74619e..0abdd28 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -37,6 +37,7 @@ public: vector Estimators, Groups; //holds estimators to be used set lines; //hold lines to be used set labels; //holds labels to be used + vector Treenames; string getPhylipFile(); string getColumnFile(); @@ -80,6 +81,12 @@ public: void clearAbund(); void parseGlobalData(string, string); + + void parseTreeFile(); //parses through tree file to find names of nodes and number of them + //this is required in case user has sequences in the names file that are + //not included in the tree. + //only takes names from the first tree in the tree file and assumes that all trees use the same names. + private: string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups; @@ -91,6 +98,7 @@ private: GlobalData(); ~GlobalData(); void reset(); //clears all non filename parameters + void readTreeString(ifstream&); diff --git a/helpcommand.cpp b/helpcommand.cpp index 8e28cde..05aaff0 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -72,21 +72,23 @@ int HelpCommand::execute(){ }else if (globaldata->helpRequest == "collect.single") { cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n"; cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; - cout << "The collect.single command parameters are label, line, freq, calc. No parameters are required, but you may not use " << "\n"; + cout << "The collect.single command parameters are label, line, freq, calc and abund. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The collect.single command should be in the following format: " << "\n"; cout << "collect.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n"; - cout << "Example collect(label=unique-.01-.03, line=0,5,10, iters=10000, freq=10, calc=sobs-chao-ace-jack)." << "\n"; + cout << "Example collect(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-chao-ace-jack)." << "\n"; cout << "The default values for freq is 100, and calc are sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson." << "\n"; cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; }else if (globaldata->helpRequest == "collect.shared") { cout << "The collect.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The collect.shared command parameters are label, line, freq, jumble, calc. No parameters are required, but you may not use " << "\n"; + cout << "The collect.shared command parameters are label, line, freq, jumble, calc and groups. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The collect.shared command should be in the following format: " << "\n"; - cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, jumble=yourJumble, calc=yourEstimators)." << "\n"; - cout << "Example collect.shared(label=unique-.01-.03, line=0,5,10, freq=10, jumble=1, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n"; + cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, jumble=1, groups=B-C, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n"; cout << "The default values for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble), freq is 100 and calc are sharedsobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n"; }else if (globaldata->helpRequest == "get.group") { cout << "The get.group command can only be executed after a successful read.otu command of a group file." << "\n"; @@ -109,26 +111,28 @@ int HelpCommand::execute(){ }else if (globaldata->helpRequest == "rarefaction.single") { cout << "The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n"; cout << "The rarefaction.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; - cout << "The rarefaction.single command parameters are label, line, iters, freq, calc. No parameters are required, but you may not use " << "\n"; + cout << "The rarefaction.single command parameters are label, line, iters, freq, calc and abund. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The rarefaction.single command should be in the following format: " << "\n"; cout << "rarefaction.single(label=yourLabel, line=yourLines, iters=yourIters, freq=yourFreq, calc=yourEstimators)." << "\n"; - cout << "Example rarefaction.single(label=unique-.01-.03, line=0,5,10, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson)." << "\n"; + cout << "Example rarefaction.single(label=unique-.01-.03, line=0-5-10, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson)." << "\n"; cout << "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness." << "\n"; cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; }else if (globaldata->helpRequest == "rarefaction.shared") { cout << "The rarefaction.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The rarefaction.shared command parameters are label, line, iters, jumble and calc. No parameters are required, but you may not use " << "\n"; + cout << "The rarefaction.shared command parameters are label, line, iters, jumble, groups and calc. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The rarefaction command should be in the following format: " << "\n"; - cout << "rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, jumble=yourJumble, calc=yourEstimators)." << "\n"; - cout << "Example rarefaction.shared(label=unique-.01-.03, line=0,5,10, iters=10000, jumble=1, calc=sharedobserved)." << "\n"; + cout << "rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, jumble=1, groups=B-C, calc=sharedobserved)." << "\n"; cout << "The default values for iters is 1000, jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble), freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness." << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; }else if (globaldata->helpRequest == "summary.single") { cout << "The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION." << "\n"; cout << "The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; - cout << "The summary.single command parameters are label, line, calc. No parameters are required, but you may not use " << "\n"; + cout << "The summary.single command parameters are label, line, calc, abund. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The summary.single command should be in the following format: " << "\n"; cout << "summary.single(label=yourLabel, line=yourLines, calc=yourEstimators)." << "\n"; cout << "Example summary.single(label=unique-.01-.03, line=0,5,10, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson)." << "\n"; @@ -139,10 +143,12 @@ int HelpCommand::execute(){ cout << "The summary.shared command can only be executed after a successful read.otu command." << "\n"; cout << "The summary.shared command parameters are label, line, jumble and calc. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The summary.shared command should be in the following format: " << "\n"; - cout << "summary.shared(label=yourLabel, line=yourLines, jumble=yourJumble, calc=yourEstimators)." << "\n"; - cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, jumble=1, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n"; + cout << "summary.shared(label=yourLabel, line=yourLines, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, jumble=1, groups=B-C, calc=sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN)." << "\n"; cout << "The default value for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble) and calc is sharedsobs-sharedChao-sharedAce-sharedJabund-sharedSorensonAbund-sharedJclass-sharedSorClass-sharedJest-sharedSorEst-SharedThetaYC-SharedThetaN" << "\n"; + cout << "The default value for groups is all the groups in your groupfile." << "\n"; cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; + cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; }else if (globaldata->helpRequest == "parsimony") { cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n"; diff --git a/parsimony.cpp b/parsimony.cpp index 7d08adf..9a8706e 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -75,7 +75,9 @@ EstOutput Parsimony::getValues(Tree* t) { if (numGroups == 0) { //get score for all users groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - groups.push_back(tmap->namesOfGroups[i]); + if (tmap->namesOfGroups[i] != "xxx") { + groups.push_back(tmap->namesOfGroups[i]); + } } }else { for (int i = 0; i < globaldata->Groups.size(); i++) { diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 190b7d6..86f8d01 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -352,9 +352,11 @@ void ParsimonyCommand::setGroups() { if (globaldata->Groups.size() == 0) { cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); - numGroups++; - allGroups += tmap->namesOfGroups[i] + "-"; + if (tmap->namesOfGroups[i] != "xxx") { + allGroups += tmap->namesOfGroups[i] + "-"; + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + } } allGroups = allGroups.substr(0, allGroups.length()-1); }else { @@ -367,16 +369,20 @@ void ParsimonyCommand::setGroups() { }else{//user has enter "all" and wants the default groups globaldata->Groups.clear(); for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); - numGroups++; - allGroups += tmap->namesOfGroups[i] + "-"; + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i] + "-"; + } } allGroups = allGroups.substr(0, allGroups.length()-1); globaldata->setGroups(""); } }else { for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - allGroups += tmap->namesOfGroups[i] + "-"; + if (tmap->namesOfGroups[i] != "xxx") { + allGroups += tmap->namesOfGroups[i] + "-"; + } } allGroups = allGroups.substr(0, allGroups.length()-1); numGroups = 1; diff --git a/readtree.cpp b/readtree.cpp index 0291644..d3790df 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -100,11 +100,9 @@ float ReadTree::readBranchLength(istream& f) { } } - /***********************************************************************/ /***********************************************************************/ - //Child Classes Below /***********************************************************************/ @@ -113,12 +111,25 @@ float ReadTree::readBranchLength(istream& f) { int ReadNewickTree::read() { try { + holder = ""; int c, error; int comment = 0; //if you are not a nexus file if ((c = filehandle.peek()) != '#') { while((c = filehandle.peek()) != EOF) { + while ((c = filehandle.peek()) != EOF) { + // get past comments + if(c == '[') { + comment = 1; + } + if(c == ']'){ + comment = 0; + } + if((c == '(') && (comment != 1)){ break; } + filehandle.get(); + } + //make new tree T = new Tree(); numNodes = T->getNumNodes(); @@ -164,6 +175,8 @@ int ReadNewickTree::read() { globaldata->gTree.push_back(T); } } + + if (error != 0) { readOk = error; } return readOk; } catch(exception& e) { @@ -236,7 +249,7 @@ int ReadNewickTree::readTreeString() { n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off lc = readNewickInt(filehandle, n, T); - if (lc == -1) { return -1; } //reports an error in reading + if (lc == -1) { cout << "error with lc" << endl; return -1; } //reports an error in reading if(filehandle.peek()==','){ readSpecialChar(filehandle,',',"comma"); @@ -247,7 +260,7 @@ int ReadNewickTree::readTreeString() { } if(rooted != 1){ rc = readNewickInt(filehandle, n, T); - if (rc == -1) { return -1; } //reports an error in reading + if (rc == -1) { cout << "error with rc" << endl; return -1; } //reports an error in reading if(filehandle.peek() == ')'){ readSpecialChar(filehandle,')',"right parenthesis"); } @@ -331,7 +344,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { } int blen = 0; - if(d == ':') { blen = 1; } + if(d == ':') { blen = 1; } f.putback(d); @@ -342,27 +355,22 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { int n1 = T->getIndex(name); //adds sequence names that are not in group file to the "xxx" group - if(n1 == -1) { - cerr << "Name: " << name << " not found in your groupfile. \n"; readOk = -1; return n1; + if(group == "not found") { + cout << "Name: " << name << " is not in your groupfile, and will be disregarded. \n"; //readOk = -1; return n1; - //globaldata->gTreemap->namesOfSeqs.push_back(name); - //globaldata->gTreemap->treemap[name].groupname = "xxx"; - //globaldata->gTreemap->treemap[name].vectorIndex = (globaldata->gTreemap->namesOfSeqs.size() - 1); + globaldata->gTreemap->namesOfSeqs.push_back(name); + globaldata->gTreemap->treemap[name].groupname = "xxx"; - //map::iterator it; - //it = globaldata->gTreemap->seqsPerGroup.find("xxx"); - //if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group - // globaldata->gTreemap->namesOfGroups.push_back("xxx"); - // globaldata->gTreemap->seqsPerGroup["xxx"] = 1; - //}else { - // globaldata->gTreemap->seqsPerGroup["xxx"]++; - //} + map::iterator it; + it = globaldata->gTreemap->seqsPerGroup.find("xxx"); + if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group + globaldata->gTreemap->namesOfGroups.push_back("xxx"); + globaldata->gTreemap->seqsPerGroup["xxx"] = 1; + }else { + globaldata->gTreemap->seqsPerGroup["xxx"]++; + } - //find index in tree of name - //n1 = T->getIndex(name); - //group = "xxx"; - //numLeaves++; - //numNodes = 2*numLeaves - 1; + group = "xxx"; } T->tree[n1].setGroup(group); diff --git a/readtreecommand.cpp b/readtreecommand.cpp index 6af28ad..19ab675 100644 --- a/readtreecommand.cpp +++ b/readtreecommand.cpp @@ -22,6 +22,9 @@ ReadTreeCommand::ReadTreeCommand(){ //memory leak prevention //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; } globaldata->gTreemap = treeMap; + + //get names in tree + globaldata->parseTreeFile(); read = new ReadNewickTree(filename); @@ -59,6 +62,23 @@ int ReadTreeCommand::execute(){ T[i]->assembleTree(); } + //output any names that are in names file but not in tree + if (globaldata->Treenames.size() < treeMap->getNumSeqs()) { + for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) { + //is that name in the tree? + int count = 0; + for (int j = 0; j < globaldata->Treenames.size(); j++) { + if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it + count++; + } + + //then you did not find it so report it + if (count == globaldata->Treenames.size()) { + cout << treeMap->namesOfSeqs[i] << " is in your namefile and not in your tree. It will be disregarded." << endl; + } + } + } + return 0; } catch(exception& e) { diff --git a/tree.cpp b/tree.cpp index 22892e2..6aa1b82 100644 --- a/tree.cpp +++ b/tree.cpp @@ -13,9 +13,9 @@ /*****************************************************************/ Tree::Tree() { try { - globaldata = GlobalData::getInstance(); - numLeaves = globaldata->gTreemap->getNumSeqs(); + + numLeaves = globaldata->Treenames.size(); numNodes = 2*numLeaves - 1; tree.resize(numNodes); @@ -24,13 +24,13 @@ Tree::Tree() { for (int i = 0; i < numNodes; i++) { //initialize leaf nodes if (i <= (numLeaves-1)) { - tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]); - tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])); + tree[i].setName(globaldata->Treenames[i]); + tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i])); //set pcount and pGroup for groupname to 1. - tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1; - tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1; + tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1; + tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1; //Treemap knows name, group and index to speed up search - globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i); + globaldata->gTreemap->setIndex(globaldata->Treenames[i], i); //intialize non leaf nodes }else if (i > (numLeaves-1)) { @@ -486,7 +486,7 @@ int Tree::findRoot() { try { for (int i = 0; i < numNodes; i++) { //you found the root - if (tree[i].getParent() == -1) { return i; } + if (tree[i].getParent() == -1) { return i; } } return -1; } diff --git a/treemap.cpp b/treemap.cpp index 9eb8330..fee309c 100644 --- a/treemap.cpp +++ b/treemap.cpp @@ -72,7 +72,13 @@ string TreeMap::getGroup(string sequenceName) { } /************************************************************/ void TreeMap::setIndex(string seq, int index) { - treemap[seq].vectorIndex = index; + it = treemap.find(seq); + if (it != treemap.end()) { //sequence name was in group file + treemap[seq].vectorIndex = index; + }else { + treemap[seq].vectorIndex = index; + treemap[seq].groupname = "not found"; + } } /************************************************************/ int TreeMap::getIndex(string seq) { diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 65f30eb..2b93df7 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -211,9 +211,11 @@ void UnifracUnweightedCommand::setGroups() { if (globaldata->Groups.size() == 0) { cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); - numGroups++; - allGroups += tmap->namesOfGroups[i] + "-"; + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i] + "-"; + } } allGroups = allGroups.substr(0, allGroups.length()-1); }else { @@ -226,16 +228,20 @@ void UnifracUnweightedCommand::setGroups() { }else{//user has enter "all" and wants the default groups globaldata->Groups.clear(); for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); - numGroups++; - allGroups += tmap->namesOfGroups[i] + "-"; + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i] + "-"; + } } allGroups = allGroups.substr(0, allGroups.length()-1); globaldata->setGroups(""); } }else { for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - allGroups += tmap->namesOfGroups[i] + "-"; + if (tmap->namesOfGroups[i] != "xxx") { + allGroups += tmap->namesOfGroups[i] + "-"; + } } allGroups = allGroups.substr(0, allGroups.length()-1); numGroups = 1; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 0a4c7fa..c2584ea 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -227,11 +227,14 @@ int UnifracWeightedCommand::findIndex(float score, int index) { /***********************************************************/ void UnifracWeightedCommand::setGroups() { try { + numGroups = 0; //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() == 0) { - numGroups = tmap->getNumGroups(); - for (int i=0; i < numGroups; i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + for (int i=0; i < tmap->getNumGroups(); i++) { + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + } } }else { if (globaldata->getGroups() != "all") { @@ -246,25 +249,31 @@ void UnifracWeightedCommand::setGroups() { //if the user only entered invalid groups if (globaldata->Groups.size() == 0) { - numGroups = tmap->getNumGroups(); - for (int i=0; i < numGroups; i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + for (int i=0; i < tmap->getNumGroups(); i++) { + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + } } cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; }else if (globaldata->Groups.size() == 1) { cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; - numGroups = tmap->getNumGroups(); globaldata->Groups.clear(); - for (int i=0; i < numGroups; i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + for (int i=0; i < tmap->getNumGroups(); i++) { + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + } } }else { numGroups = globaldata->Groups.size(); } }else { //users wants all groups - numGroups = tmap->getNumGroups(); globaldata->Groups.clear(); globaldata->setGroups(""); - for (int i=0; i < numGroups; i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + for (int i=0; i < tmap->getNumGroups(); i++) { + if (tmap->namesOfGroups[i] != "xxx") { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + } } } } diff --git a/unweighted.cpp b/unweighted.cpp index 3694d26..b3f50c8 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -109,7 +109,9 @@ EstOutput Unweighted::getValues(Tree* t) { if (numGroups == 0) { //get score for all users groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - groups.push_back(tmap->namesOfGroups[i]); + if (tmap->namesOfGroups[i] != "xxx") { + groups.push_back(tmap->namesOfGroups[i]); + } } }else { for (int i = 0; i < globaldata->Groups.size(); i++) { @@ -297,7 +299,9 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { if (numGroups == 0) { //get score for all users groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - groups.push_back(tmap->namesOfGroups[i]); + if (tmap->namesOfGroups[i] != "xxx") { + groups.push_back(tmap->namesOfGroups[i]); + } } }else { for (int i = 0; i < globaldata->Groups.size(); i++) { diff --git a/validparameter.cpp b/validparameter.cpp index cb6db16..f4780a7 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -229,10 +229,10 @@ void ValidParameters::initCommandParameters() { try { //{"parameter1","parameter2",...,"last parameter"}; - string readdistArray[] = {"phylip","column", "name","cutoff","precision"}; + string readdistArray[] = {"phylip","column","name","cutoff","precision","group"}; commandParameters["read.dist"] = addParameters(readdistArray, sizeof(readdistArray)/sizeof(string)); - string readotuArray[] = {"list","order","shared", "line", "label","group","shared", "sabund", "rabund"}; + string readotuArray[] = {"list","order","shared", "line", "label","group","sabund", "rabund"}; commandParameters["read.otu"] = addParameters(readotuArray, sizeof(readotuArray)/sizeof(string)); string readtreeArray[] = {"tree","group"}; @@ -244,7 +244,7 @@ void ValidParameters::initCommandParameters() { string deconvoluteArray[] = {"fasta"}; commandParameters["deconvolute"] = addParameters(deconvoluteArray, sizeof(deconvoluteArray)/sizeof(string)); - string collectsingleArray[] = {"freq","line","label","calc","precision","abund"}; + string collectsingleArray[] = {"freq","line","label","calc","abund"}; commandParameters["collect.single"] = addParameters(collectsingleArray, sizeof(collectsingleArray)/sizeof(string)); string collectsharedArray[] = {"jumble","freq","line","label","calc","groups"}; @@ -262,7 +262,7 @@ void ValidParameters::initCommandParameters() { string rarefactionsingleArray[] = {"iters","freq","line","label","calc","abund"}; commandParameters["rarefaction.single"] = addParameters(rarefactionsingleArray, sizeof(rarefactionsingleArray)/sizeof(string)); - string rarefactionsharedArray[] = {"iters","jumble","line","label","calc"}; + string rarefactionsharedArray[] = {"iters","jumble","line","label","calc","groups"}; commandParameters["rarefaction.shared"] = addParameters(rarefactionsharedArray, sizeof(rarefactionsharedArray)/sizeof(string)); string libshuffArray[] = {"iters","groups","step","form","cutoff"}; @@ -271,19 +271,19 @@ void ValidParameters::initCommandParameters() { string summarysingleArray[] = {"line","label","calc","abund"}; commandParameters["summary.single"] = addParameters(summarysingleArray, sizeof(summarysingleArray)/sizeof(string)); - string summarysharedArray[] = {"jumble","line","label","calc"}; + string summarysharedArray[] = {"jumble","line","label","calc","groups"}; commandParameters["summary.shared"] = addParameters(summarysharedArray, sizeof(summarysharedArray)/sizeof(string)); - string parsimonyArray[] = {"random","group","iters"}; + string parsimonyArray[] = {"random","groups","iters"}; commandParameters["parsimony"] = addParameters(parsimonyArray, sizeof(parsimonyArray)/sizeof(string)); - string unifracWeightedArray[] = {"group","iters"}; + string unifracWeightedArray[] = {"groups","iters"}; commandParameters["unifrac.weighted"] = addParameters(unifracWeightedArray, sizeof(unifracWeightedArray)/sizeof(string)); - string unifracUnweightedArray[] = {"group","iters"}; + string unifracUnweightedArray[] = {"groups","iters"}; commandParameters["unifrac.unweighted"] = addParameters(unifracUnweightedArray, sizeof(unifracUnweightedArray)/sizeof(string)); - string heatmapArray[] = {"group","line","label","sorted"}; + string heatmapArray[] = {"groups","line","label","sorted"}; commandParameters["heatmap"] = addParameters(heatmapArray, sizeof(heatmapArray)/sizeof(string)); string vennArray[] = {"groups","line","label","calc"}; @@ -339,8 +339,8 @@ void ValidParameters::initParameterRanges() { string freqArray[] = {">","1", "<","NA", "between"}; parameterRanges["freq"] = addParameters(freqArray, rangeSize); - string lineArray[] = {">=","1", "<","NA", "between"}; - parameterRanges["line"] = addParameters(lineArray, rangeSize); + //string lineArray[] = {">=","1", "<","NA", "between"}; + //parameterRanges["line"] = addParameters(lineArray, rangeSize); string abundArray[] = {">=","5", "<","NA", "between"}; parameterRanges["abund"] = addParameters(abundArray, rangeSize); -- 2.39.2