From: westcott Date: Mon, 4 May 2009 13:35:25 +0000 (+0000) Subject: added concensus command and updated calcs X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=50ed3b6104d5821d6184f882e1e1423d47dcbf10 added concensus command and updated calcs --- diff --git a/ace.h b/ace.h index c3d156c..26d93b1 100644 --- a/ace.h +++ b/ace.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class Ace : public Calculator { public: - Ace(int n) : abund(n), Calculator("ace", 3) {}; + Ace(int n) : abund(n), Calculator("ace", 3, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: int abund; }; diff --git a/bergerparker.h b/bergerparker.h index a0fd2aa..7b1dac9 100644 --- a/bergerparker.h +++ b/bergerparker.h @@ -19,9 +19,9 @@ It is a child of the calculator class.*/ class BergerParker : public Calculator { public: - BergerParker() : Calculator("bergerparker", 1) {}; + BergerParker() : Calculator("bergerparker", 1, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: }; diff --git a/bootstrap.h b/bootstrap.h index 7c72ca2..e54d985 100644 --- a/bootstrap.h +++ b/bootstrap.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class Bootstrap : public Calculator { public: - Bootstrap() : Calculator("Bootstrap", 1) {}; + Bootstrap() : Calculator("Bootstrap", 1, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; }; diff --git a/bootstrapsharedcommand.cpp b/bootstrapsharedcommand.cpp index 9051dd8..e88972c 100644 --- a/bootstrapsharedcommand.cpp +++ b/bootstrapsharedcommand.cpp @@ -91,6 +91,7 @@ int BootSharedCommand::execute(){ try { int count = 1; EstOutput data; + vector subset; //if the users entered no valid calculators don't execute command if (treeCalculators.size() == 0) { return 0; } @@ -163,15 +164,19 @@ int BootSharedCommand::execute(){ for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare for (int l = k; l < lookup.size(); l++) { if (k != l) { //we dont need to similiarity of a groups to itself + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + //get estimated similarity between 2 groups - data = treeCalculators[i]->getValues(lookup[k], lookup[l]); //saves the calculator outputs + data = treeCalculators[i]->getValues(subset); //saves the calculator outputs //save values in similarity matrix simMatrix[k][l] = data[0]; simMatrix[l][k] = data[0]; } } } - + //creates tree from similarity matrix and write out file createTree(out[i]); } @@ -219,8 +224,8 @@ void BootSharedCommand::createTree(ostream* out){ //do merges and create tree structure by setting parents and children //there are numGroups - 1 merges to do for (int i = 0; i < (numGroups - 1); i++) { - - float largest = 0.0; + + float largest = -1.0; int row, column; //find largest value in sims matrix by searching lower triangle for (int j = 1; j < simMatrix.size(); j++) { @@ -232,7 +237,7 @@ void BootSharedCommand::createTree(ostream* out){ //set non-leaf node info and update leaves to know their parents //non-leaf t->tree[numGroups + i].setChildren(index[row], index[column]); - + //parents t->tree[index[row]].setParent(numGroups + i); t->tree[index[column]].setParent(numGroups + i); @@ -243,36 +248,36 @@ void BootSharedCommand::createTree(ostream* out){ //branchlengths t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves()); t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves()); - + //set your length to leaves to your childs length plus branchlength t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength()); - + //update index index[row] = numGroups+i; index[column] = numGroups+i; //zero out highest value that caused the merge. - simMatrix[row][column] = 0.0; - simMatrix[column][row] = 0.0; - + simMatrix[row][column] = -1.0; + simMatrix[column][row] = -1.0; + //merge values in simsMatrix for (int n = 0; n < simMatrix.size(); n++) { //row becomes merge of 2 groups simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2; simMatrix[n][row] = simMatrix[row][n]; //delete column - simMatrix[column][n] = 0.0; - simMatrix[n][column] = 0.0; + simMatrix[column][n] = -1.0; + simMatrix[n][column] = -1.0; } } - + //assemble tree t->assembleTree(); - + //print newick file t->print(*out); - + //delete tree delete t; diff --git a/bstick.h b/bstick.h index f4693b7..476b588 100644 --- a/bstick.h +++ b/bstick.h @@ -18,9 +18,9 @@ It is a child of the calculator class.*/ class BStick : public Calculator { public: - BStick() : Calculator("bstick", 3) {}; + BStick() : Calculator("bstick", 3, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: double invSum(int, double); diff --git a/calculator.h b/calculator.h index 4cc3280..127c835 100644 --- a/calculator.h +++ b/calculator.h @@ -23,17 +23,19 @@ class Calculator { public: Calculator(){}; - Calculator(string n, int c) : name(n), cols(c) {}; + Calculator(string n, int c, bool m) : name(n), cols(c), multiple(m) {}; virtual EstOutput getValues(SAbundVector*) = 0; - virtual EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) = 0; + virtual EstOutput getValues(vector) = 0; virtual void print(ostream& f) { f.setf(ios::fixed, ios::floatfield); f.setf(ios::showpoint); f << data[0]; for(int i=1;i) {return data;}; }; diff --git a/collect.cpp b/collect.cpp index 8043dc1..d109ca0 100644 --- a/collect.cpp +++ b/collect.cpp @@ -63,6 +63,7 @@ void Collect::getSharedCurve(int increment = 1){ try { globaldata = GlobalData::getInstance(); vector lookup; + vector subset; //create and initialize vector of sharedvectors, one for each group for (int i = 0; i < globaldata->Groups.size(); i++) { @@ -78,14 +79,18 @@ try { //initialize labels for output //makes 'uniqueAB uniqueAC uniqueBC' if your groups are A, B, C getGroupComb(); - groupLabel = ""; + groupLabel = ""; for (int s = 0; s < groupComb.size(); s++) { groupLabel = groupLabel + label + groupComb[s] + "\t"; } - + + //for multi displays + string groupLabelAll = groupLabel + label + "all\t"; + for(int i=0;iregisterDisplay(displays[i]); //adds a display[i] to cdd - displays[i]->init(groupLabel); + if (displays[i]->isCalcMultiple() == true) { displays[i]->init(groupLabelAll); } + else { displays[i]->init(groupLabel); } } //sample all the members @@ -106,16 +111,19 @@ try { //calculate at 0 and the given increment if((i == 0) || (i+1) % increment == 0){ - //randomize group order - if (globaldata->getJumble() == "1") { random_shuffle(lookup.begin(), lookup.end()); } - //how many comparisons to make i.e. for group a, b, c = ab, ac, bc. + //how many comparisons to make i.e. for group a, b, c = ab, ac, bc. int n = 1; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { - ccd->updateSharedData(lookup[k], lookup[l], i+1, globaldata->Groups.size()); + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabund vectors + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + ccd->updateSharedData(subset, i+1, globaldata->Groups.size()); } n++; } + //if this is a calculator that can do multiples then do them + ccd->updateSharedData(lookup, i+1, globaldata->Groups.size()); } totalNumSeq = i+1; } @@ -126,10 +134,15 @@ try { int n = 1; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { - ccd->updateSharedData(lookup[k], lookup[l], totalNumSeq, globaldata->Groups.size()); + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabund vectors + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + ccd->updateSharedData(subset, totalNumSeq, globaldata->Groups.size()); } n++; } + //if this is a calculator that can do multiples then do them + ccd->updateSharedData(lookup, totalNumSeq, globaldata->Groups.size()); } //resets output files diff --git a/collectdisplay.h b/collectdisplay.h index af97c75..7e40ad7 100644 --- a/collectdisplay.h +++ b/collectdisplay.h @@ -23,15 +23,15 @@ public: output->output(nSeqs, data); }; - void update(SharedRAbundVector* shared1, SharedRAbundVector* shared2, int numSeqs, int numGroups){ + void update(vector shared, int numSeqs, int numGroups){ timesCalled++; - data = estimate->getValues(shared1, shared2); //passes estimators a shared vector from each group to be compared + data = estimate->getValues(shared); //passes estimators a shared vector from each group to be compared //figure out what groups are being compared in getValues //because the jumble parameter randomizes the order we need to put the results in the correct column in the output file int group1Index, group2Index, pos; - group1Index = shared1->getGroupIndex(); - group2Index = shared2->getGroupIndex(); + group1Index = shared[0]->getGroupIndex(); + group2Index = shared[1]->getGroupIndex(); numGroupComb = 0; int n = 1; @@ -46,12 +46,25 @@ public: } n++; } - - groupData.resize((numGroupComb*data.size()), 0); - //fills groupdata with datas info - for (int i = 0; i < data.size(); i++) { - groupData[pos+i] = data[i]; + if (estimate->getMultiple() == true) { + numGroupComb++; + groupData.resize((numGroupComb*data.size()), 0); + //is this the time its called with all values + if ((timesCalled % numGroupComb) == 0) { + //last spot + pos = ((groupData.size()-1) * data.size()); + } + //fills groupdata with datas info + for (int i = 0; i < data.size(); i++) { + groupData[pos+i] = data[i]; + } + }else { + groupData.resize((numGroupComb*data.size()), 0); + //fills groupdata with datas info + for (int i = 0; i < data.size(); i++) { + groupData[pos+i] = data[i]; + } } //when you get all your groups info then output @@ -63,6 +76,7 @@ public: void init(string s) { output->initFile(s); }; void reset() { output->resetFile(); }; void close() { output->resetFile(); }; + bool isCalcMultiple() { return estimate->getMultiple(); } private: Calculator* estimate; diff --git a/collectorscurvedata.h b/collectorscurvedata.h index af81c39..480bc36 100644 --- a/collectorscurvedata.h +++ b/collectorscurvedata.h @@ -40,23 +40,25 @@ private: class SharedCollectorsCurveData : public Observable { public: - SharedCollectorsCurveData() : shared1(0), shared2(0) {}; + SharedCollectorsCurveData() { }; //: shared1(0), shared2(0) void registerDisplay(Display* o) { displays.insert(o); }; void removeDisplay(Display* o) { displays.erase(o); delete o; }; - void SharedDataChanged() { notifyDisplays(); }; - void updateSharedData(SharedRAbundVector* rv, SharedRAbundVector* rv2, int numSeqs, int numGroupComb) { shared1 = rv; shared2 = rv2; NumSeqs = numSeqs; NumGroupComb = numGroupComb; SharedDataChanged(); }; - + void SharedDataChanged() { notifyDisplays(); }; + void updateSharedData(vector s, int numSeqs, int numGroupComb) { shared = s; NumSeqs = numSeqs; NumGroupComb = numGroupComb; SharedDataChanged(); }; + void notifyDisplays(){ for(set::iterator pos=displays.begin();pos!=displays.end();pos++){ - (*pos)->update(shared1, shared2, NumSeqs, NumGroupComb); + if ( ((*pos)->isCalcMultiple() == true) || ( ((*pos)->isCalcMultiple() == false) && (shared.size() == 2) ) ) { + (*pos)->update(shared, NumSeqs, NumGroupComb); + } } }; private: set displays; - SharedRAbundVector* shared1; - SharedRAbundVector* shared2; + vector multiDisplays; + vector shared; int NumSeqs, NumGroupComb; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index ff15613..1f6f828 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -37,6 +37,7 @@ #include "getoturepcommand.h" #include "treegroupscommand.h" #include "bootstrapsharedcommand.h" +#include "concensuscommand.h" /***********************************************************/ @@ -86,6 +87,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "get.oturep") { command = new GetOTURepCommand(); } else if(commandName == "tree.shared") { command = new TreeGroupCommand(); } else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(); } + else if(commandName == "concensus") { command = new ConcensusCommand(); } else { command = new NoCommand(); } return command; diff --git a/coverage.h b/coverage.h index c17f8e7..7b89c1e 100644 --- a/coverage.h +++ b/coverage.h @@ -20,9 +20,9 @@ class Coverage : public Calculator { public: - Coverage() : Calculator("coverage", 1) {}; + Coverage() : Calculator("coverage", 1, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; }; diff --git a/display.h b/display.h index 17867c8..c4301ba 100644 --- a/display.h +++ b/display.h @@ -15,10 +15,11 @@ class Display { public: virtual void update(SAbundVector* rank) = 0; - virtual void update(SharedRAbundVector* shared1, SharedRAbundVector* shared2, int numSeqs, int numGroupComb) = 0; + virtual void update(vector shared, int numSeqs, int numGroupComb) = 0; virtual void init(string) = 0; virtual void reset() = 0; virtual void close() = 0; + virtual bool isCalcMultiple() = 0; private: diff --git a/errorchecking.cpp b/errorchecking.cpp index aa223a2..1693354 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -196,9 +196,9 @@ bool ErrorCheck::checkInput(string input) { } } - if ((commandName == "unifrac.weighted") || (commandName == "unifrac.unweighted")) { + if ((commandName == "unifrac.weighted") || (commandName == "unifrac.unweighted") || (commandName == "concensus")) { if (globaldata->gTree.size() == 0) {//no trees were read - cout << "You must execute the read.tree command, before you may execute the unifrac.weighted or unifrac.unweighted command." << endl; return false; } + cout << "You must execute the read.tree command, before you may execute the unifrac.weighted, unifrac.unweighted or concensus command." << endl; return false; } } //check for valid method diff --git a/fileoutput.cpp b/fileoutput.cpp index 9c1f9c0..74b5335 100644 --- a/fileoutput.cpp +++ b/fileoutput.cpp @@ -90,7 +90,7 @@ void ThreeColumnFile::resetFile(){ renameOk = rename(outName.c_str(), inName.c_str()); //checks to make sure user was able to rename and remove successfully - if ((renameOk != 0)) { cout << "Unable to rename necessary files." << endl; cout << outName << " g " << inName << endl;} + if ((renameOk != 0)) { cout << "Unable to rename necessary files." << endl; } } catch(exception& e) { diff --git a/fileoutput.h b/fileoutput.h index 3b73b14..d7c4726 100644 --- a/fileoutput.h +++ b/fileoutput.h @@ -32,7 +32,7 @@ protected: class ThreeColumnFile : public FileOutput { public: - ThreeColumnFile(string n) : FileOutput(), inName(n), counter(0), outName(getPathName(n) + ".temp." + getSimpleName(n)) { }; + ThreeColumnFile(string n) : FileOutput(), inName(n), counter(0), outName(getPathName(n) + ".temp") { }; ~ThreeColumnFile(); void initFile(string); void output(int, vector); @@ -56,7 +56,7 @@ class OneColumnFile : public FileOutput { public: - OneColumnFile(string n) : inName(n), counter(0), outName(getPathName(n) + ".temp." + getSimpleName(n)) {}; + OneColumnFile(string n) : inName(n), counter(0), outName(getPathName(n) + ".temp") {}; ~OneColumnFile(); void output(int, vector); void initFile(string); @@ -80,7 +80,7 @@ class SharedOneColumnFile : public FileOutput { public: - SharedOneColumnFile(string n) : inName(n), counter(0), outName(getPathName(n) + ".temp." + getSimpleName(n)) {}; + SharedOneColumnFile(string n) : inName(n), counter(0), outName(getPathName(n) + ".temp") {}; ~SharedOneColumnFile(); void output(int, vector); void initFile(string); @@ -105,7 +105,7 @@ private: class SharedThreeColumnFile : public FileOutput { public: - SharedThreeColumnFile(string n, string groups) : FileOutput(), groupLabel(groups), inName(n), counter(0), numGroup(1), outName(getPathName(n) + ".temp." + getSimpleName(n)) { }; + SharedThreeColumnFile(string n, string groups) : FileOutput(), groupLabel(groups), inName(n), counter(0), numGroup(1), outName(getPathName(n) + ".temp") { }; ~SharedThreeColumnFile(); void initFile(string); void output(int, vector); @@ -125,11 +125,11 @@ private: }; /***********************************************************************/ - +//used by parsimony, unifrac.weighted and unifrac.unweighted class ColumnFile : public FileOutput { public: - ColumnFile(string n) : FileOutput(), inName(n), counter(0), outName(getPathName(n) + ".temp." + getSimpleName(n)) { globaldata = GlobalData::getInstance(); }; + ColumnFile(string n) : FileOutput(), inName(n), counter(0), outName(getPathName(n) + ".temp") { globaldata = GlobalData::getInstance(); }; ~ColumnFile(); //to make compatible with parent class diff --git a/geom.h b/geom.h index f88cb4a..be60de3 100644 --- a/geom.h +++ b/geom.h @@ -19,9 +19,11 @@ It is a child of the calculator class. */ class Geom : public Calculator { public: - Geom() : Calculator("geometric", 3) {}; + + Geom() : Calculator("geometric", 3, false) {}; + EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: double kEq(double, double); diff --git a/helpcommand.cpp b/helpcommand.cpp index ac01330..8060d5f 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -83,11 +83,11 @@ int HelpCommand::execute(){ cout << "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq)." << "\n" << "\n"; }else if (globaldata->helpRequest == "collect.shared") { cout << "The collect.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The collect.shared command parameters are label, line, freq, jumble, calc and groups. No parameters are required, but you may not use " << "\n"; + cout << "The collect.shared command parameters are label, line, freq, calc and groups. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The collect.shared command should be in the following format: " << "\n"; - cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n"; - cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, jumble=1, groups=B-C, calc=sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; - cout << "The default values for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble), freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan." << "\n"; + cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; + cout << "The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan." << "\n"; cout << "The default value for groups is all the groups in your groupfile." << "\n"; validCalcs->printCalc("shared", cout); cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; @@ -147,12 +147,12 @@ int HelpCommand::execute(){ cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n"; }else if (globaldata->helpRequest == "summary.shared") { cout << "The summary.shared command can only be executed after a successful read.otu command." << "\n"; - cout << "The summary.shared command parameters are label, line, jumble and calc. No parameters are required, but you may not use " << "\n"; + cout << "The summary.shared command parameters are label, line and calc. No parameters are required, but you may not use " << "\n"; cout << "both the line and label parameters at the same time. The summary.shared command should be in the following format: " << "\n"; - cout << "summary.shared(label=yourLabel, line=yourLines, jumble=yourJumble, calc=yourEstimators, groups=yourGroups)." << "\n"; - cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, jumble=1, groups=B-C, calc=sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; + cout << "summary.shared(label=yourLabel, line=yourLines, calc=yourEstimators, groups=yourGroups)." << "\n"; + cout << "Example summary.shared(label=unique-.01-.03, line=0,5,10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n"; validCalcs->printCalc("sharedsummary", cout); - cout << "The default value for jumble is 1 (meaning jumble, if it’s set to 0 then it will not jumble) and calc is sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan" << "\n"; + cout << "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan" << "\n"; cout << "The default value for groups is all the groups in your groupfile." << "\n"; cout << "The label and line parameters are used to analyze specific lines in your input." << "\n"; cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n"; @@ -250,6 +250,15 @@ int HelpCommand::execute(){ validCalcs->printCalc("boot", cout); cout << "The bootstrap.shared command outputs a .tre file for each calculator you specify at each distance you choose containing iters number of trees." << "\n"; cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + }else if (globaldata->helpRequest == "concensus") { + cout << "The concensus command can only be executed after a successful read.tree command." << "\n"; + cout << "The concensus command has no parameters." << "\n"; + cout << "The concensus command should be in the following format: concensus()." << "\n"; + cout << "The concensus command output two files: .concensus.tre and .concensuspairs." << "\n"; + cout << "The .concensus.tre file contains the concensus tree of the trees in your input file." << "\n"; + cout << "The branch lengths are the percentage of trees in your input file that had the given pair." << "\n"; + cout << "The .concensuspairs file contains a list of the internal nodes in your tree. For each node, the pair that was used in the concensus tree " << "\n"; + cout << "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages." << "\n" << "\n"; }else if (globaldata->helpRequest == "bin.seqs") { cout << "The bin.seqs command can only be executed after a successful read.otu command of a list file." << "\n"; cout << "The bin.seqs command parameters are fasta, name, line and label. The fasta parameter is required, and you may not use line and label at the same time." << "\n"; diff --git a/jackknife.h b/jackknife.h index ec920e0..70f1dca 100644 --- a/jackknife.h +++ b/jackknife.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class Jackknife : public Calculator { public: - Jackknife() : Calculator("Jackknife", 3) { getAMatrix(); }; + Jackknife() : Calculator("Jackknife", 3, false) { getAMatrix(); }; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: static const int maxOrder = 30; diff --git a/logsd.h b/logsd.h index 7bb12c3..70d584d 100644 --- a/logsd.h +++ b/logsd.h @@ -19,9 +19,10 @@ It is a child of the calculator class.*/ class LogSD : public Calculator { public: - LogSD() : Calculator("logseries", 3) {}; + + LogSD() : Calculator("logseries", 3, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) { return data; }; + EstOutput getValues(vector) {return data;}; private: double logS(double); diff --git a/mothur.h b/mothur.h index 36d7b28..8ad1645 100644 --- a/mothur.h +++ b/mothur.h @@ -42,7 +42,15 @@ using namespace std; typedef unsigned long long ull; - +struct IntNode { + int lvalue; + int rvalue; + int lcoef; + int rcoef; + IntNode* left; + IntNode* right; +}; + /***********************************************************************/ // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 @@ -95,7 +103,6 @@ string toHex(const T&x){ return output.str(); } - //********************************************************************************************************************** template @@ -203,6 +210,17 @@ inline string getSimpleName(string longName){ return simpleName; } +/***********************************************************************/ + +inline int factorial(int num){ + int total = 1; + + for (int i = 1; i <= num; i++) { + total *= i; + } + + return total; +} /***********************************************************************/ diff --git a/npshannon.h b/npshannon.h index 9272409..dc137ed 100644 --- a/npshannon.h +++ b/npshannon.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class NPShannon : public Calculator { public: - NPShannon() : Calculator("NPShannon", 1) {}; + NPShannon() : Calculator("NPShannon", 1, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: }; diff --git a/nseqs.h b/nseqs.h index 0a7b0c3..1382add 100644 --- a/nseqs.h +++ b/nseqs.h @@ -19,13 +19,13 @@ class NSeqs : public Calculator { public: - NSeqs() : Calculator("NSeqs", 1) {}; + NSeqs() : Calculator("NSeqs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ data.resize(1,0); data[0] = (double)rank->getNumSeqs(); return data; } - EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) {return data;}; + EstOutput getValues(vector) {return data;}; }; /***********************************************************************/ diff --git a/qstat.h b/qstat.h index b0c985a..5bc6d9e 100644 --- a/qstat.h +++ b/qstat.h @@ -18,9 +18,10 @@ It is a child of the calculator class.*/ class QStat : public Calculator { public: - QStat() : Calculator("qstat", 1) {}; + QStat() : Calculator("qstat", 1, false) {}; + EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: RAbundVector rdata; diff --git a/raredisplay.cpp b/raredisplay.cpp index 8126266..6843ebe 100644 --- a/raredisplay.cpp +++ b/raredisplay.cpp @@ -68,9 +68,9 @@ void RareDisplay::update(SAbundVector* rank){ }; /***********************************************************************/ -void RareDisplay::update(SharedRAbundVector* shared1, SharedRAbundVector* shared2, int numSeqs, int numGroupComb) { +void RareDisplay::update(vector shared, int numSeqs, int numGroupComb) { try { - vector data = estimate->getValues(shared1, shared2); + vector data = estimate->getValues(shared); double newNSeqs = data[0]; if(nIters != 1){ diff --git a/raredisplay.h b/raredisplay.h index ff7de73..b7711d2 100644 --- a/raredisplay.h +++ b/raredisplay.h @@ -15,13 +15,14 @@ class RareDisplay : public Display { public: RareDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file), nIters(1), - tempInName(getPathName(output->getFileName()) + ".tempin."+ getSimpleName(output->getFileName())), tempOutName(getPathName(output->getFileName()) + ".tempout."+ getSimpleName(output->getFileName())) {}; + tempInName(getPathName(output->getFileName()) + ".tempin"), tempOutName(getPathName(output->getFileName()) + ".tempout") {}; ~RareDisplay() { delete estimate; delete output; }; void init(string); void reset(); void update(SAbundVector*); - void update(SharedRAbundVector* shared1, SharedRAbundVector* shared2, int numSeqs, int numGroupComb); + void update(vector shared, int numSeqs, int numGroupComb); void close(); + bool isCalcMultiple() { return estimate->getMultiple(); } private: Calculator* estimate; diff --git a/rarefact.cpp b/rarefact.cpp index 2f5f506..d7df01e 100644 --- a/rarefact.cpp +++ b/rarefact.cpp @@ -114,10 +114,15 @@ try { //randomize the groups random_shuffle(lookup.begin(), lookup.end()); - + + vector subset; //send each group one at a time for (int k = 0; k < lookup.size(); k++) { - rcd->updateSharedData(lookup[0], lookup[k], k+1, numGroupComb); + subset.clear(); //clears out old pair of sharedrabunds + //add in new pair of sharedrabunds + subset.push_back(lookup[0]); subset.push_back(lookup[k]); + + rcd->updateSharedData(subset, k+1, numGroupComb); mergeVectors(lookup[0], lookup[k]); } diff --git a/rarefactioncurvedata.h b/rarefactioncurvedata.h index 9d97b80..fad613b 100644 --- a/rarefactioncurvedata.h +++ b/rarefactioncurvedata.h @@ -38,23 +38,22 @@ private: class SharedRarefactionCurveData : public Observable { public: - SharedRarefactionCurveData() : shared1(0), shared2(0) {}; + SharedRarefactionCurveData() {}; //: shared1(0), shared2(0) void registerDisplay(Display* o) { displays.insert(o); }; void removeDisplay(Display* o) { displays.erase(o); delete o; }; void SharedDataChanged() { notifyDisplays(); }; - void updateSharedData(SharedRAbundVector* rv, SharedRAbundVector* rv2, int numSeqs, int numGroupComb) { shared1 = rv; shared2 = rv2; NumSeqs = numSeqs; NumGroupComb = numGroupComb; SharedDataChanged(); }; + void updateSharedData(vector r, int numSeqs, int numGroupComb) { shared = r; NumSeqs = numSeqs; NumGroupComb = numGroupComb; SharedDataChanged(); }; void notifyDisplays(){ for(set::iterator pos=displays.begin();pos!=displays.end();pos++){ - (*pos)->update(shared1, shared2, NumSeqs, NumGroupComb); + (*pos)->update(shared, NumSeqs, NumGroupComb); } }; private: set displays; - SharedRAbundVector* shared1; - SharedRAbundVector* shared2; + vector shared; int NumSeqs, NumGroupComb; }; diff --git a/shannon.h b/shannon.h index e594c09..1930239 100644 --- a/shannon.h +++ b/shannon.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class Shannon : public Calculator { public: - Shannon() : Calculator("Shannon", 3) {}; + Shannon() : Calculator("Shannon", 3, false) {}; EstOutput getValues(SAbundVector* rank); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; private: }; diff --git a/sharedace.cpp b/sharedace.cpp index 8730596..34ad28b 100644 --- a/sharedace.cpp +++ b/sharedace.cpp @@ -12,16 +12,16 @@ /***********************************************************************/ -EstOutput SharedAce::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput SharedAce::getValues(vector shared) { try { data.resize(1,0); string label; - label = shared1->getLabel(); + label = shared[0]->getLabel(); int fARare, fBRare, S12Rare, S12Abund, S12, f11, tempA, tempB, t10, t01, t11, t21, t12, t22, C12Numerator; fARare = 0; fBRare = 0; S12Rare = 0; S12Abund = 0; S12 = 0; f11 = 0; t10 = 0; t01 = 0; t11= 0; t21= 0; t12= 0; t22= 0; C12Numerator = 0; - float SharedAce, C12, part1, part2, part3, part4, part5, Gamma1, Gamma2, Gamma3; + float Sharedace, C12, part1, part2, part3, part4, part5, Gamma1, Gamma2, Gamma3; /*fARare = number of OTUs with one individual found in A and less than or equal to 10 in B. fBRare = number of OTUs with one individual found in B and less than or equal to 10 in A. @@ -32,10 +32,10 @@ EstOutput SharedAce::getValues(SharedRAbundVector* shared1, SharedRAbundVector* S12 = number of shared OTUs in A and B This estimator was changed to reflect Caldwell's changes, eliminating the nrare / nrare - 1 */ - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); if ((tempA != 0) && (tempB != 0)) {//they are shared S12++; //do both A and B have one @@ -94,8 +94,8 @@ EstOutput SharedAce::getValues(SharedRAbundVector* shared1, SharedRAbundVector* part4 = fBRare * Gamma2; part5 = f11 * Gamma3; - SharedAce = S12Abund + part1 + (part2 * (part3 + part4 + part5)); - data[0] = SharedAce; + Sharedace = S12Abund + part1 + (part2 * (part3 + part4 + part5)); + data[0] = Sharedace; return data; } diff --git a/sharedace.h b/sharedace.h index f30e196..d8f69e1 100644 --- a/sharedace.h +++ b/sharedace.h @@ -19,9 +19,9 @@ It is a child of the calculator class. */ class SharedAce : public Calculator { public: - SharedAce(int n=10) : abund(n), Calculator("sharedace", 3) {}; + SharedAce(int n=10) : abund(n), Calculator("sharedace", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: int abund; }; diff --git a/sharedanderbergs.cpp b/sharedanderbergs.cpp index 08ca8fd..0a113ab 100644 --- a/sharedanderbergs.cpp +++ b/sharedanderbergs.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput Anderberg::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput Anderberg::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; @@ -21,10 +21,10 @@ EstOutput Anderberg::getValues(SharedRAbundVector* shared1, SharedRAbundVector* data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); if (tempA != 0) { S1++; } if (tempB != 0) { S2++; } diff --git a/sharedanderbergs.h b/sharedanderbergs.h index 1c8d9ce..f17e5fa 100644 --- a/sharedanderbergs.h +++ b/sharedanderbergs.h @@ -16,9 +16,9 @@ class Anderberg : public Calculator { public: - Anderberg() : Calculator("Anderberg", 1) {}; + Anderberg() : Calculator("Anderberg", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedbraycurtis.cpp b/sharedbraycurtis.cpp index d1b401e..9a63cdf 100644 --- a/sharedbraycurtis.cpp +++ b/sharedbraycurtis.cpp @@ -10,8 +10,8 @@ #include "sharedbraycurtis.h" /***********************************************************************/ - -EstOutput BrayCurtis::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +//This is used by SharedJAbund and SharedSorAbund +EstOutput BrayCurtis::getValues(vector shared) { try { data.resize(1,0); @@ -24,10 +24,10 @@ EstOutput BrayCurtis::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sumSharedAB = the sum of the minimum otus int all shared otus in AB. */ - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); sumSharedA += tempA; sumSharedB += tempB; diff --git a/sharedbraycurtis.h b/sharedbraycurtis.h index bbd09f5..446b3eb 100644 --- a/sharedbraycurtis.h +++ b/sharedbraycurtis.h @@ -15,9 +15,9 @@ class BrayCurtis : public Calculator { public: - BrayCurtis() : Calculator("BrayCurtis", 1) {}; + BrayCurtis() : Calculator("BrayCurtis", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedchao1.cpp b/sharedchao1.cpp index 02e2411..0df0eab 100644 --- a/sharedchao1.cpp +++ b/sharedchao1.cpp @@ -10,53 +10,76 @@ #include "sharedchao1.h" /***********************************************************************/ -EstOutput SharedChao1::getValues(SharedRAbundVector* sharedA, SharedRAbundVector* sharedB){ +EstOutput SharedChao1::getValues(vector shared){ try { data.resize(1,0); - int f11, f1A, f2A, f1B, f2B, S12, tempA, tempB; - f11 = 0; f1A = 0; f2A = 0; f1B = 0; f2B = 0; S12 = 0; - float ChaoAB, part1, part2, part3; - - /* f11 = number of shared OTUs with one observed individual in A and B - f1A, f2A = number of shared OTUs with one or two individuals observed in A - f1B, f2B = number of shared OTUs with one or two individuals observed in B - S12 = number of shared OTUs in A and B */ - - //loop through vectors calculating the f11, f1A, f2A, f1B, f2B, S12 values - for (int i = 0; i< sharedA->size(); i++) { - tempA = sharedA->getAbundance(i); //store in temps to avoid calling getAbundance multiple times - tempB = sharedB->getAbundance(i); - if ((tempA != 0) && (tempB != 0)) {//they are shared - S12++; //they are shared - //do both A and B have one - if ((tempA == 1) && (tempB == 1)) { f11++; } - - //does A have one or two - if (tempA == 1) { f1A++; } - else if (tempA == 2) { f2A++; } + vector temp; + int numGroups = shared.size(); + float Chao = 0.0; float leftvalue, rightvalue; - //does B have one or two - if (tempB == 1) { f1B++; } - else if (tempB == 2) { f2B++; } - + // IntNode is defined in mothur.h + // The tree used here is a binary tree used to represent the f1+++, f+1++, f++1+, f+++1, f11++, f1+1+... + // combinations required to solve the chao estimator equation for any number of groups. Conceptually, think + // of each node as having a 1 and a + value, or for f2 values a 2 and a + value, and 2 pointers to intnodes, and 2 coeffient values. + // The coeffient value is how many times you chose branch 1 to get to that fvalue. + // If you choose left you are selecting the 1 or 2 value and right means the + value. For instance, to find + // the number of bins that have f1+1+ you would start at the root, go left, right, left, and select the rightvalue. + // the coeffient is 2. Note: we only set the coeffient in f2 values. + + //create and initialize trees to 0. + initialTree(numGroups); + + //loop through vectors calculating the f11, f1A, f2A, f1B, f2B, S12 values + for (int i = 0; i < shared[0]->size(); i++) { + //get bin values and calc shared + bool sharedByAll = true; + temp.clear(); + for (int j = 0; j < numGroups; j++) { + temp.push_back(shared[j]->getAbundance(i)); + if (temp[j] == 0) { sharedByAll = false; } + } + + //they are shared + if (sharedByAll == true) { + + // cout << "temp = "; + // for (int h = 0; h < temp.size(); h++) { cout << temp[h] << " "; } cout << endl; + //find f1 and f2values + updateTree(temp); } } - - //checks for divide by zero error - if ((f2A == 0) || (f2B == 0)) { - part1 = ((float)(f1A*f1B)/(float)(4*(f2A+1)*(f2B+1))); - part2 = ((float)(f1A*(f1A-1))/(float)(2*f2A+2)); - part3 = ((float)(f1B*(f1B-1))/(float)(2*f2B+2)); - }else { - part1 = ((float)(f1A*f1B)/(float)(4*f2A*f2B)); - part2 = ((float)(f1A*f1A)/(float)(2*f2A)); - part3 = ((float)(f1B*f1B)/(float)(2*f2B)); + + + //cout << "Entering " << endl; + //calculate chao1, (numleaves-1) because numleaves contains the ++ values. + for (int i = 0; i < numLeaves; i++) { + //divide by zero error + if (f2leaves[i]->lvalue == 0) { f2leaves[i]->lvalue++; } + if (f2leaves[i]->rvalue == 0) { f2leaves[i]->rvalue++; } + + //cout << "f2 leaves "; + //cout << f2leaves[i]->lvalue << f2leaves[i]->rvalue << endl; + + // cout << "f2 leaf coef "; + //cout << f2leaves[i]->lcoef << '\t' << f2leaves[i]->rcoef << endl; + + // cout << "f1 leaves "; + // cout << f1leaves[i]->lvalue << f1leaves[i]->rvalue << endl; + + + leftvalue = (float)(f1leaves[i]->lvalue * f1leaves[i]->lvalue) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * f2leaves[i]->lvalue); + if (i != (numLeaves-1)) { + rightvalue = (float)(f1leaves[i]->rvalue * f1leaves[i]->rvalue) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * f2leaves[i]->rvalue); + }else{ + rightvalue = (float)(f1leaves[i]->rvalue); + } + //cout << "left = " << leftvalue << " right = " << rightvalue << endl; + Chao += leftvalue + rightvalue; } - ChaoAB = (float)S12 + (float)(f11*part1) + part2 + part3; - data[0] = ChaoAB; - + // cout << "exiting " << endl; + data[0] = Chao; return data; } catch(exception& e) { @@ -67,7 +90,197 @@ EstOutput SharedChao1::getValues(SharedRAbundVector* sharedA, SharedRAbundVector cout << "An unknown error has occurred in the SharedChao1 class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } +} + +/***********************************************************************/ +//builds trees structure with n leaf nodes initialized to 0. +void SharedChao1::initialTree(int n) { + try { + // (2^n) / 2. Divide by 2 because each leaf node contains 2 values. One for + and one for 1 or 2. + numLeaves = pow(2, (float)n) / 2; + numNodes = 2*numLeaves - 1; + int countleft = 0; + int countright = 1; + + f1leaves.resize(numNodes); + f2leaves.resize(numNodes); + + //initialize leaf values + for (int i = 0; i < numLeaves; i++) { + f1leaves[i] = new IntNode; + f1leaves[i]->lvalue = 0; + f1leaves[i]->rvalue = 0; + f1leaves[i]->left = NULL; + f1leaves[i]->right = NULL; + + f2leaves[i] = new IntNode; + f2leaves[i]->lvalue = 0; + f2leaves[i]->rvalue = 0; + f2leaves[i]->left = NULL; + f2leaves[i]->right = NULL; + } + + //set pointers to children + for (int j = numLeaves; j < numNodes; j++) { + f1leaves[j] = new IntNode; + f1leaves[j]->left = f1leaves[countleft]; + f1leaves[j]->right = f1leaves[countright]; + + f2leaves[j] = new IntNode; + f2leaves[j]->left = f2leaves[countleft]; + f2leaves[j]->right =f2leaves[countright]; + + countleft = countleft + 2; + countright = countright + 2; + } + + //point to root + f1root = f1leaves[numNodes-1]; + + //point to root + f2root = f2leaves[numNodes-1]; + + //set coeffients + setCoef(f2root, 0); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function initialTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedChao1 class function initialTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************************/ +//take vector containing the abundance info. for a bin and updates trees. +void SharedChao1::updateTree(vector bin) { + try { + updateBranchf1(f1root, bin, 0); + updateBranchf2(f2root, bin, 0); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function updateTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedChao1 class function updateTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } } /***********************************************************************/ +void SharedChao1::updateBranchf1(IntNode* node, vector bin, int index) { + try { + //if you have more than one group + if (index == (bin.size()-1)) { + if (bin[index] == 1) { node->lvalue++; node->rvalue++; } + else { node->rvalue++; } + }else { + if (bin[index] == 1) { + //follow path as if you are 1 + updateBranchf1(node->left, bin, index+1); + } + //follow path as if you are + + updateBranchf1(node->right, bin, index+1); + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function updateBranchf1. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedChao1 class function updateBranchf1. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ +void SharedChao1::updateBranchf2(IntNode* node, vector bin, int index) { + try { + //if you have more than one group + if (index == (bin.size()-1)) { + if (bin[index] == 2) { node->lvalue++; node->rvalue++; } + else { node->rvalue++; } + }else { + if (bin[index] == 2) { + //follow path as if you are 1 + updateBranchf2(node->left, bin, index+1); + } + //follow path as if you are + + updateBranchf2(node->right, bin, index+1); + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function updateBranchf2. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedChao1 class function updateBranchf2. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ +void SharedChao1::setCoef(IntNode* node, int coef) { + try { + if (node->left != NULL) { + setCoef(node->left, coef+1); + setCoef(node->right, coef); + }else { + node->lcoef = coef+1; + node->rcoef = coef; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedChao1 class Function getCoef. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedChao1 class function getCoef. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ +//for debugging purposes +void SharedChao1::printTree() { + + cout << "F1 leaves" << endl; + printBranch(f1root); + + cout << "F2 leaves" << endl; + printBranch(f2root); + + +} +/*****************************************************************/ +void SharedChao1::printBranch(IntNode* node) { + try { + + // you are not a leaf + if (node->left != NULL) { + printBranch(node->left); + printBranch(node->right); + }else { //you are a leaf + cout << node->lvalue << endl; + cout << node->rvalue << endl; + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/*****************************************************************/ + + + + diff --git a/sharedchao1.h b/sharedchao1.h index 7cca63a..243039e 100644 --- a/sharedchao1.h +++ b/sharedchao1.h @@ -17,12 +17,30 @@ It is a child of the calculator class. */ /***********************************************************************/ + class SharedChao1 : public Calculator { -public: - SharedChao1() : Calculator("sharedchao", 3) {}; - EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + public: + SharedChao1() : Calculator("sharedchao", 3, true) {}; + EstOutput getValues(SAbundVector*) {return data;}; + EstOutput getValues(vector); + private: + IntNode* f1root; + IntNode* f2root; + vector f1leaves; + vector f2leaves; + int numLeaves; + int numNodes; + + void initialTree(int); //builds trees structure with n leaf nodes initialized to 0. + void setCoef(IntNode*, int); + void updateTree(vector); //take vector containing the abundance info. for a bin and updates trees. + void updateBranchf1(IntNode*, vector, int); //pointer, vector of abundance values, index into vector + void updateBranchf2(IntNode*, vector, int); //pointer, vector of abundance values, index into vector + + //for debugging + void printTree(); + void printBranch(IntNode*); }; /***********************************************************************/ diff --git a/sharedjabund.cpp b/sharedjabund.cpp index 79a5745..b969e24 100644 --- a/sharedjabund.cpp +++ b/sharedjabund.cpp @@ -11,13 +11,13 @@ /***********************************************************************/ -EstOutput JAbund::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput JAbund::getValues(vector shared) { try { EstOutput UVest; UVest.resize(2,0); data.resize(1,0); - UVest = uv->getUVest(shared1, shared2); + UVest = uv->getUVest(shared); //UVest[0] is Uest UVest[1] is Vest data[0] = (UVest[0] * UVest[1]) / ((float)(UVest[0] + UVest[1] - (UVest[0] * UVest[1]))); diff --git a/sharedjabund.h b/sharedjabund.h index 5e44243..f01d616 100644 --- a/sharedjabund.h +++ b/sharedjabund.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class JAbund : public Calculator { public: - JAbund() : Calculator("JAbund", 3) {}; + JAbund() : Calculator("JAbund", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: UVEst* uv; diff --git a/sharedjclass.cpp b/sharedjclass.cpp index 54d87cc..a0ccc89 100644 --- a/sharedjclass.cpp +++ b/sharedjclass.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput Jclass::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput Jclass::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; @@ -21,10 +21,10 @@ EstOutput Jclass::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sha data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); //find number of bins in shared1 and shared2 if (tempA != 0) { S1++; } diff --git a/sharedjclass.h b/sharedjclass.h index bafc3f7..45302ef 100644 --- a/sharedjclass.h +++ b/sharedjclass.h @@ -19,9 +19,9 @@ It is a child of the calculator class. */ class Jclass : public Calculator { public: - Jclass() : Calculator("Jclass", 3) {}; + Jclass() : Calculator("Jclass", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedjest.cpp b/sharedjest.cpp index 079cdef..3e37046 100644 --- a/sharedjest.cpp +++ b/sharedjest.cpp @@ -14,7 +14,7 @@ /***********************************************************************/ -EstOutput Jest::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput Jest::getValues(vector shared) { try { EstOutput S1, S2, S12; S12.resize(1,0); @@ -32,10 +32,10 @@ EstOutput Jest::getValues(SharedRAbundVector* shared1, SharedRAbundVector* share SAbundVector* chaoS1Sabund = new SAbundVector(); SAbundVector* chaoS2Sabund = new SAbundVector(); - *chaoS1Sabund = shared1->getSAbundVector(); - *chaoS2Sabund = shared2->getSAbundVector(); + *chaoS1Sabund = shared[0]->getSAbundVector(); + *chaoS2Sabund = shared[1]->getSAbundVector(); - S12 = sharedChao->getValues(shared1, shared2); + S12 = sharedChao->getValues(shared); S1 = chaoS1->getValues(chaoS1Sabund); S2 = chaoS2->getValues(chaoS2Sabund); diff --git a/sharedjest.h b/sharedjest.h index fa1f613..8a943c9 100644 --- a/sharedjest.h +++ b/sharedjest.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class Jest : public Calculator { public: - Jest() : Calculator("Jest", 3) {}; + Jest() : Calculator("Jest", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedkstest.cpp b/sharedkstest.cpp index 1da7c90..fefe14f 100644 --- a/sharedkstest.cpp +++ b/sharedkstest.cpp @@ -11,28 +11,28 @@ /***********************************************************************/ -EstOutput KSTest::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2){ +EstOutput KSTest::getValues(vector shared){ try { data.resize(2,0); //Must return shared1 and shared2 to original order at conclusion of kstest - vector initData1 = shared1->getData(); - vector initData2 = shared2->getData(); - shared1->sortD(); - shared2->sortD(); + vector initData1 = shared[0]->getData(); + vector initData2 = shared[1]->getData(); + shared[0]->sortD(); + shared[1]->sortD(); - int numNZ1 = shared1->numNZ(); - int numNZ2 = shared2->numNZ(); - double numInd1 = (double)shared1->getNumSeqs(); - double numInd2 = (double)shared2->getNumSeqs(); + int numNZ1 = shared[0]->numNZ(); + int numNZ2 = shared[1]->numNZ(); + double numInd1 = (double)shared[0]->getNumSeqs(); + double numInd2 = (double)shared[1]->getNumSeqs(); double maxDiff = -1; double sum1 = 0; double sum2 = 0; - for(int i = 1; i < shared1->getNumBins(); i++) + for(int i = 1; i < shared[0]->getNumBins(); i++) { - sum1 += shared1->get(i).abundance; - sum2 += shared2->get(i).abundance; + sum1 += shared[0]->get(i).abundance; + sum2 += shared[1]->get(i).abundance; double diff = fabs((double)sum1/numInd1 - (double)sum2/numInd2); if(diff > maxDiff) maxDiff = diff; @@ -54,8 +54,8 @@ EstOutput KSTest::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sha cout << "If D-Statistic is greater than the critical value then the data sets are significantly different at the 95% confidence level.\n\n"; }*/ - shared1->setData(initData1); - shared2->setData(initData2); + shared[0]->setData(initData1); + shared[1]->setData(initData2); data[0] = DStatistic; data[1] = critVal; diff --git a/sharedkstest.h b/sharedkstest.h index 67c8525..6509b53 100644 --- a/sharedkstest.h +++ b/sharedkstest.h @@ -18,9 +18,9 @@ It is a child of the calculator class.*/ class KSTest : public Calculator { public: - KSTest() : Calculator("kstest", 3) {}; + KSTest() : Calculator("kstest", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedkulczynski.cpp b/sharedkulczynski.cpp index a4a20a7..5eadd16 100644 --- a/sharedkulczynski.cpp +++ b/sharedkulczynski.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput Kulczynski::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput Kulczynski::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; @@ -21,10 +21,10 @@ EstOutput Kulczynski::getValues(SharedRAbundVector* shared1, SharedRAbundVector* data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); if (tempA != 0) { S1++; } if (tempB != 0) { S2++; } diff --git a/sharedkulczynski.h b/sharedkulczynski.h index 574ddd5..aad69fe 100644 --- a/sharedkulczynski.h +++ b/sharedkulczynski.h @@ -17,9 +17,9 @@ class Kulczynski : public Calculator { public: - Kulczynski() : Calculator("Kulczynski", 1) {}; + Kulczynski() : Calculator("Kulczynski", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedkulczynskicody.cpp b/sharedkulczynskicody.cpp index 66f9d07..7ae6700 100644 --- a/sharedkulczynskicody.cpp +++ b/sharedkulczynskicody.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput KulczynskiCody::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput KulczynskiCody::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; @@ -21,10 +21,10 @@ EstOutput KulczynskiCody::getValues(SharedRAbundVector* shared1, SharedRAbundVec data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); if (tempA != 0) { S1++; } if (tempB != 0) { S2++; } diff --git a/sharedkulczynskicody.h b/sharedkulczynskicody.h index 20ac44e..b91dee9 100644 --- a/sharedkulczynskicody.h +++ b/sharedkulczynskicody.h @@ -18,9 +18,9 @@ class KulczynskiCody : public Calculator { public: - KulczynskiCody() : Calculator("KulczynskiCody", 1) {}; + KulczynskiCody() : Calculator("KulczynskiCody", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedlennon.cpp b/sharedlennon.cpp index b002a68..4846e20 100644 --- a/sharedlennon.cpp +++ b/sharedlennon.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput Lennon::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput Lennon::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB, min; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; min = 0; @@ -21,10 +21,10 @@ EstOutput Lennon::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sha data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); if (tempA != 0) { S1++; } if (tempB != 0) { S2++; } diff --git a/sharedlennon.h b/sharedlennon.h index 51bc9ed..0aba6a9 100644 --- a/sharedlennon.h +++ b/sharedlennon.h @@ -18,9 +18,9 @@ class Lennon : public Calculator { public: - Lennon() : Calculator("Lennon", 1) {}; + Lennon() : Calculator("Lennon", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedmorisitahorn.cpp b/sharedmorisitahorn.cpp index 8c5cf92..eeb01d3 100644 --- a/sharedmorisitahorn.cpp +++ b/sharedmorisitahorn.cpp @@ -10,7 +10,7 @@ #include "sharedmorisitahorn.h" /***********************************************************************/ -EstOutput MorHorn::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput MorHorn::getValues(vector shared) { try { data.resize(1,0); @@ -20,17 +20,17 @@ EstOutput MorHorn::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sh morhorn = 0.0; sumSharedA = 0.0; sumSharedB = 0.0; a = 0.0; b = 0.0; d = 0.0; //get the total values we need to calculate the theta denominator sums - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - Atotal += shared1->getAbundance(i); - Btotal += shared2->getAbundance(i); + Atotal += shared[0]->getAbundance(i); + Btotal += shared[1]->getAbundance(i); } //calculate the theta denominator sums - for (int j = 0; j < shared1->size(); j++) { + for (int j = 0; j < shared[0]->size(); j++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(j); - tempB = shared2->getAbundance(j); + tempA = shared[0]->getAbundance(j); + tempB = shared[1]->getAbundance(j); a += tempA * tempA; b += tempB * tempB; diff --git a/sharedmorisitahorn.h b/sharedmorisitahorn.h index dd01dc7..e771139 100644 --- a/sharedmorisitahorn.h +++ b/sharedmorisitahorn.h @@ -17,9 +17,9 @@ class MorHorn : public Calculator { public: - MorHorn() : Calculator("MorisitaHorn", 1) {}; + MorHorn() : Calculator("MorisitaHorn", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharednseqs.h b/sharednseqs.h index 6f38054..1809d97 100644 --- a/sharednseqs.h +++ b/sharednseqs.h @@ -16,11 +16,11 @@ class SharedNSeqs : public Calculator { public: - SharedNSeqs() : Calculator("SharedNSeqs", 1) {}; + SharedNSeqs() : Calculator("SharedNSeqs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ return data; }; - EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { + EstOutput getValues(vector shared) { data.resize(1,0); - data[0] = (double)shared1->getNumSeqs() + (double)shared2->getNumSeqs(); + data[0] = (double)shared[0]->getNumSeqs() + (double)shared[1]->getNumSeqs(); return data; } }; diff --git a/sharedochiai.cpp b/sharedochiai.cpp index a9677e5..744fb1c 100644 --- a/sharedochiai.cpp +++ b/sharedochiai.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput Ochiai::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput Ochiai::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; @@ -21,10 +21,10 @@ EstOutput Ochiai::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sha data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); if (tempA != 0) { S1++; } if (tempB != 0) { S2++; } diff --git a/sharedochiai.h b/sharedochiai.h index 26c6c58..61d9df3 100644 --- a/sharedochiai.h +++ b/sharedochiai.h @@ -16,9 +16,9 @@ class Ochiai : public Calculator { public: - Ochiai() : Calculator("Ochiai", 1) {}; + Ochiai() : Calculator("Ochiai", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedordervector.cpp b/sharedordervector.cpp index 0a6f095..2830cec 100644 --- a/sharedordervector.cpp +++ b/sharedordervector.cpp @@ -341,6 +341,7 @@ SharedSAbundVector SharedOrderVector::getSharedSAbundVector(string group) { /***********************************************************************/ SharedOrderVector SharedOrderVector::getSharedOrderVector(){ + random_shuffle(data.begin(), data.end()); return *this; } diff --git a/sharedsobs.cpp b/sharedsobs.cpp index 9c3f4dd..388356e 100644 --- a/sharedsobs.cpp +++ b/sharedsobs.cpp @@ -13,16 +13,16 @@ //This returns the number of unique species observed in several groups. //The shared vector is each groups sharedrabundvector. -EstOutput SharedSobs::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2){ +EstOutput SharedSobs::getValues(vector shared){ try { data.resize(1,0); int observed = 0; //loop through the species in each group - for (int k = 0; k < shared1->size(); k++) { + for (int k = 0; k < shared[0]->size(); k++) { //if you have found a new species - if (shared1->getAbundance(k) != 0) { observed++; } - else if ((shared1->getAbundance(k) == 0) && (shared2->getAbundance(k) != 0)) { observed++; } + if (shared[0]->getAbundance(k) != 0) { observed++; } + else if ((shared[0]->getAbundance(k) == 0) && (shared[1]->getAbundance(k) != 0)) { observed++; } } data[0] = observed; diff --git a/sharedsobs.h b/sharedsobs.h index b49119b..e71b6b8 100644 --- a/sharedsobs.h +++ b/sharedsobs.h @@ -19,9 +19,9 @@ It is a child of the calculator class. */ class SharedSobs : public Calculator { public: - SharedSobs() : Calculator("sharedsobs", 1) {}; + SharedSobs() : Calculator("sharedsobs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ return data; }; - EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2); + EstOutput getValues(vector); }; /***********************************************************************/ diff --git a/sharedsobscollectsummary.cpp b/sharedsobscollectsummary.cpp index d39e66e..6815d87 100644 --- a/sharedsobscollectsummary.cpp +++ b/sharedsobscollectsummary.cpp @@ -13,21 +13,21 @@ //This returns the number of shared species observed in several groups. //The shared vector is each groups sharedrabundvector. -EstOutput SharedSobsCS::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2){ +EstOutput SharedSobsCS::getValues(vector shared){ try { data.resize(1,0); int observed = 0; - int tempA, tempB; + int numGroups = shared.size(); - //loop through the species in each group - for (int k = 0; k < shared1->size(); k++) { - tempA = shared1->getAbundance(k); //store in temps to avoid calling getAbundance multiple times - tempB = shared2->getAbundance(k); - - //if you have found a new species - if ((tempA != 0) && (tempB != 0)) {//they are shared - observed++; + for (int i = 0; i < shared[0]->size(); i++) { + //get bin values and set sharedByAll + bool sharedByAll = true; + for (int j = 0; j < numGroups; j++) { + if (shared[j]->getAbundance(i) == 0) { sharedByAll = false; } } + + //they are shared + if (sharedByAll == true) { observed++; } } data[0] = observed; diff --git a/sharedsobscollectsummary.h b/sharedsobscollectsummary.h index 016fb6a..6c8e7c3 100644 --- a/sharedsobscollectsummary.h +++ b/sharedsobscollectsummary.h @@ -18,9 +18,9 @@ class SharedSobsCS : public Calculator { public: - SharedSobsCS() : Calculator("sharedsobs", 1) {}; + SharedSobsCS() : Calculator("sharedsobs", 1, true) {}; EstOutput getValues(SAbundVector* rank){ return data; }; - EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2); + EstOutput getValues(vector); }; /***********************************************************************/ diff --git a/sharedsorabund.cpp b/sharedsorabund.cpp index 36478de..6748bfe 100644 --- a/sharedsorabund.cpp +++ b/sharedsorabund.cpp @@ -11,13 +11,13 @@ /***********************************************************************/ -EstOutput SorAbund::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput SorAbund::getValues(vector shared) { try { EstOutput UVest; UVest.resize(2,0); data.resize(1,0); - UVest = uv->getUVest(shared1, shared2); + UVest = uv->getUVest(shared); //UVest[0] is Uest, UVest[1] is Vest data[0] = (2 * UVest[0] * UVest[1]) / ((float)(UVest[0] + UVest[1])); diff --git a/sharedsorabund.h b/sharedsorabund.h index 452471a..032dbda 100644 --- a/sharedsorabund.h +++ b/sharedsorabund.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class SorAbund : public Calculator { public: - SorAbund() : Calculator("SorAbund", 3) {}; + SorAbund() : Calculator("SorAbund", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: UVEst* uv; diff --git a/sharedsorclass.cpp b/sharedsorclass.cpp index 4fafe3e..f4757a8 100644 --- a/sharedsorclass.cpp +++ b/sharedsorclass.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ -EstOutput SorClass::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput SorClass::getValues(vector shared) { try { int S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; @@ -21,10 +21,10 @@ EstOutput SorClass::getValues(SharedRAbundVector* shared1, SharedRAbundVector* s data.resize(1,0); - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); //find number of bins in shared1 and shared2 if (tempA != 0) { S1++; } diff --git a/sharedsorclass.h b/sharedsorclass.h index f867b22..16fbed0 100644 --- a/sharedsorclass.h +++ b/sharedsorclass.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class SorClass : public Calculator { public: - SorClass() : Calculator("SorClass", 3) {}; + SorClass() : Calculator("SorClass", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedsorest.cpp b/sharedsorest.cpp index 68048b1..429308f 100644 --- a/sharedsorest.cpp +++ b/sharedsorest.cpp @@ -13,7 +13,7 @@ /***********************************************************************/ -EstOutput SorEst::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput SorEst::getValues(vector shared) { try { EstOutput S1, S2, S12; S12.resize(1,0); @@ -31,10 +31,10 @@ EstOutput SorEst::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sha SAbundVector* chaoS1Sabund = new SAbundVector(); SAbundVector* chaoS2Sabund = new SAbundVector(); - *chaoS1Sabund = shared1->getSAbundVector(); - *chaoS2Sabund = shared2->getSAbundVector(); + *chaoS1Sabund = shared[0]->getSAbundVector(); + *chaoS2Sabund = shared[1]->getSAbundVector(); - S12 = sharedChao->getValues(shared1, shared2); + S12 = sharedChao->getValues(shared); S1 = chaoS1->getValues(chaoS1Sabund); S2 = chaoS2->getValues(chaoS2Sabund); diff --git a/sharedsorest.h b/sharedsorest.h index 58de552..a38cbae 100644 --- a/sharedsorest.h +++ b/sharedsorest.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class SorEst : public Calculator { public: - SorEst() : Calculator("SorEst", 3) {}; + SorEst() : Calculator("SorEst", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedthetan.cpp b/sharedthetan.cpp index 853bf81..0d7405c 100644 --- a/sharedthetan.cpp +++ b/sharedthetan.cpp @@ -10,7 +10,7 @@ #include "sharedthetan.h" /***********************************************************************/ -EstOutput ThetaN::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput ThetaN::getValues(vector shared) { try { data.resize(1,0); @@ -20,17 +20,17 @@ EstOutput ThetaN::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sha numerator = 0.0; denominator = 0.0; thetaN = 0.0; sumSharedA = 0.0; sumSharedB = 0.0; a = 0.0; b = 0.0; d = 0.0; //get the total values we need to calculate the theta denominator sums - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - Atotal += shared1->getAbundance(i); - Btotal += shared2->getAbundance(i); + Atotal += shared[0]->getAbundance(i); + Btotal += shared[1]->getAbundance(i); } //calculate the theta denominator sums - for (int j = 0; j < shared1->size(); j++) { + for (int j = 0; j < shared[0]->size(); j++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(j); - tempB = shared2->getAbundance(j); + tempA = shared[0]->getAbundance(j); + tempB = shared[1]->getAbundance(j); //they are shared if ((tempA != 0) && (tempB != 0)) { diff --git a/sharedthetan.h b/sharedthetan.h index 459868e..48e816d 100644 --- a/sharedthetan.h +++ b/sharedthetan.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class ThetaN : public Calculator { public: - ThetaN() : Calculator("ThetaN", 3) {}; + ThetaN() : Calculator("ThetaN", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/sharedthetayc.cpp b/sharedthetayc.cpp index 2567110..59cea4d 100644 --- a/sharedthetayc.cpp +++ b/sharedthetayc.cpp @@ -10,7 +10,7 @@ #include "sharedthetayc.h" /***********************************************************************/ -EstOutput ThetaYC::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput ThetaYC::getValues(vector shared) { try { data.resize(1,0); @@ -23,17 +23,17 @@ EstOutput ThetaYC::getValues(SharedRAbundVector* shared1, SharedRAbundVector* sh float b = 0; //get the total values we need to calculate the theta denominator sums - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - Atotal += shared1->getAbundance(i); - Btotal += shared2->getAbundance(i); + Atotal += shared[0]->getAbundance(i); + Btotal += shared[1]->getAbundance(i); } //calculate the theta denominator sums - for (int j = 0; j < shared1->size(); j++) { + for (int j = 0; j < shared[0]->size(); j++) { //store in temps to avoid multiple repetitive function calls - relA = shared1->getAbundance(j) / (float)Atotal; - relB = shared2->getAbundance(j) / (float)Btotal; + relA = shared[0]->getAbundance(j) / (float)Atotal; + relB = shared[1]->getAbundance(j) / (float)Btotal; a += relA * relB; b += pow((relA-relB),2); diff --git a/sharedthetayc.h b/sharedthetayc.h index b17d3c4..0d9f4a0 100644 --- a/sharedthetayc.h +++ b/sharedthetayc.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class ThetaYC : public Calculator { public: - ThetaYC() : Calculator("ThetaYC", 3) {}; + ThetaYC() : Calculator("ThetaYC", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); private: }; diff --git a/simpson.h b/simpson.h index 66f4c13..699a5d5 100644 --- a/simpson.h +++ b/simpson.h @@ -20,9 +20,9 @@ It is a child of the calculator class. */ class Simpson : public Calculator { public: - Simpson() : Calculator("Simpson", 3) {}; + Simpson() : Calculator("Simpson", 3, false) {}; EstOutput getValues(SAbundVector*); - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*) {return data;}; + EstOutput getValues(vector) {return data;}; }; /***********************************************************************/ diff --git a/sobs.h b/sobs.h index 528289e..2259244 100644 --- a/sobs.h +++ b/sobs.h @@ -21,13 +21,13 @@ It is a child of the calculator class. */ class Sobs : public Calculator { public: - Sobs() : Calculator("sobs", 1) {}; + Sobs() : Calculator("sobs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ data.resize(1,0); data[0] = (double)rank->getNumBins(); return data; } - EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2) {return data;}; + EstOutput getValues(vector) {return data;}; }; /***********************************************************************/ diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index eca2033..b3e6650 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -41,6 +41,7 @@ SummarySharedCommand::SummarySharedCommand(){ format = globaldata->getFormat(); validCalculator = new ValidCalculators(); util = new SharedUtil(); + mult = false; int i; for (i=0; iEstimators.size(); i++) { @@ -88,7 +89,6 @@ SummarySharedCommand::SummarySharedCommand(){ }else if (globaldata->Estimators[i] == "whittaker") { sumCalculators.push_back(new Whittaker()); } - } } //reset calc for next command @@ -120,7 +120,13 @@ int SummarySharedCommand::execute(){ //if the users entered no valid calculators don't execute command if (sumCalculators.size() == 0) { return 0; } - + //check if any calcs can do multiples + else{ + for (int i = 0; i < sumCalculators.size(); i++) { + if (sumCalculators[i]->getMultiple() == true) { mult = true; } + } + } + if (format == "sharedfile") { read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); @@ -147,6 +153,20 @@ int SummarySharedCommand::execute(){ } outputFileHandle << endl; + //create file and put column headers for multiple groups file + if (mult = true) { + outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary"); + openOutputFile(outAllFileName, outAll); + + outAll << "label" <<'\t' << "comparison" << '\t'; + for(int i=0;igetMultiple() == true) { + outAll << '\t' << sumCalculators[i]->getName(); + } + } + outAll << endl; + } + while(order != NULL){ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ @@ -154,15 +174,40 @@ int SummarySharedCommand::execute(){ cout << order->getLabel() << '\t' << count << endl; util->getSharedVectors(globaldata->Groups, lookup, order); //fills group vectors from order vector. //fills group vectors from order vector. - //randomize group order - if (globaldata->getJumble() == "1") { random_shuffle(lookup.begin(), lookup.end()); } + //loop through calculators and add to file all for all calcs that can do mutiple groups + if (mult = true) { + //output label + outAll << order->getLabel() << '\t'; + + //output groups names + string outNames = ""; + for (int j = 0; j < lookup.size(); j++) { + outNames += lookup[j]->getGroup() + "-"; + } + outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-'; + outAll << outNames << '\t'; + + for(int i=0;igetMultiple() == true) { + sumCalculators[i]->getValues(lookup); + outAll << '\t'; + sumCalculators[i]->print(outAll); + } + } + outAll << endl; + } int n = 1; + vector subset; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { outputFileHandle << order->getLabel() << '\t'; + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + //sort groups to be alphanumeric if (lookup[k]->getGroup() > lookup[l]->getGroup()) { outputFileHandle << (lookup[l]->getGroup() +'\t' + lookup[k]->getGroup()) << '\t'; //print out groups @@ -170,8 +215,8 @@ int SummarySharedCommand::execute(){ outputFileHandle << (lookup[k]->getGroup() +'\t' + lookup[l]->getGroup()) << '\t'; //print out groups } - for(int i=0;igetValues(lookup[k], lookup[l]); //saves the calculator outputs + for(int i=0;igetValues(subset); //saves the calculator outputs outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); } diff --git a/summarysharedcommand.h b/summarysharedcommand.h index e87e75e..ea141d1 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -53,8 +53,9 @@ private: SharedListVector* SharedList; SharedOrderVector* order; vector lookup; - string outputFileName, format; - ofstream outputFileHandle; + string outputFileName, format, outAllFileName; + ofstream outputFileHandle, outAll; + bool mult; }; diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index e50fbaa..de30d3c 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -83,7 +83,8 @@ int TreeGroupCommand::execute(){ try { int count = 1; EstOutput data; - + vector subset; + //if the users entered no valid calculators don't execute command if (treeCalculators.size() == 0) { return 0; } @@ -150,7 +151,12 @@ int TreeGroupCommand::execute(){ for (int l = k; l < lookup.size(); l++) { if (k != l) { //we dont need to similiarity of a groups to itself //get estimated similarity between 2 groups - data = treeCalculators[i]->getValues(lookup[k], lookup[l]); //saves the calculator outputs + + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + + data = treeCalculators[i]->getValues(subset); //saves the calculator outputs //save values in similarity matrix simMatrix[k][l] = data[0]; simMatrix[l][k] = data[0]; diff --git a/uvest.cpp b/uvest.cpp index 725bcd1..819be0a 100644 --- a/uvest.cpp +++ b/uvest.cpp @@ -11,7 +11,7 @@ /***********************************************************************/ //This is used by SharedJAbund and SharedSorAbund -EstOutput UVEst::getUVest(SharedRAbundVector* shared1, SharedRAbundVector* shared2) { +EstOutput UVEst::getUVest(vector shared) { try { EstOutput results; results.resize(2,0); @@ -29,10 +29,10 @@ EstOutput UVEst::getUVest(SharedRAbundVector* shared1, SharedRAbundVector* share sumSharedA1 = the sum of all shared otus in A where B = 1 sumSharedB1 = the sum of all shared otus in B where A = 1 */ - for (int i = 0; i < shared1->size(); i++) { + for (int i = 0; i < shared[0]->size(); i++) { //store in temps to avoid multiple repetitive function calls - tempA = shared1->getAbundance(i); - tempB = shared2->getAbundance(i); + tempA = shared[0]->getAbundance(i); + tempB = shared[1]->getAbundance(i); Atotal += tempA; Btotal += tempB; diff --git a/uvest.h b/uvest.h index 3eea438..4f86fed 100644 --- a/uvest.h +++ b/uvest.h @@ -23,7 +23,7 @@ typedef vector EstOutput; /***********************************************************************/ class UVEst { public: - EstOutput getUVest(SharedRAbundVector* shared1, SharedRAbundVector* shared2); + EstOutput getUVest(vector); }; /***********************************************************************/ diff --git a/validcommands.cpp b/validcommands.cpp index 696214e..94845c5 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -38,6 +38,7 @@ ValidCommands::ValidCommands() { commands["get.label"] = "get.label"; commands["get.line"] = "get.line"; commands["bootstrap.shared"] = "bootstrap.shared"; + commands["concensus"] = "concensus"; commands["help"] = "help"; commands["quit"] = "quit"; diff --git a/validparameter.cpp b/validparameter.cpp index 3fbf307..4234d2d 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -247,7 +247,7 @@ void ValidParameters::initCommandParameters() { string collectsingleArray[] = {"freq","line","label","calc","abund"}; commandParameters["collect.single"] = addParameters(collectsingleArray, sizeof(collectsingleArray)/sizeof(string)); - string collectsharedArray[] = {"jumble","freq","line","label","calc","groups"}; + string collectsharedArray[] = {"freq","line","label","calc","groups"}; commandParameters["collect.shared"] = addParameters(collectsharedArray, sizeof(collectsharedArray)/sizeof(string)); string getgroupArray[] = {}; @@ -271,7 +271,7 @@ void ValidParameters::initCommandParameters() { string summarysingleArray[] = {"line","label","calc","abund"}; commandParameters["summary.single"] = addParameters(summarysingleArray, sizeof(summarysingleArray)/sizeof(string)); - string summarysharedArray[] = {"jumble","line","label","calc","groups"}; + string summarysharedArray[] = {"line","label","calc","groups"}; commandParameters["summary.shared"] = addParameters(summarysharedArray, sizeof(summarysharedArray)/sizeof(string)); string parsimonyArray[] = {"random","groups","iters"}; @@ -301,6 +301,9 @@ void ValidParameters::initCommandParameters() { string bootstrapArray[] = {"line","label","calc","groups","iters"}; commandParameters["bootstrap.shared"] = addParameters(bootstrapArray, sizeof(bootstrapArray)/sizeof(string)); + string concensusArray[] = {}; + commandParameters["concensus"] = addParameters(concensusArray, sizeof(concensusArray)/sizeof(string)); + string quitArray[] = {}; commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string)); diff --git a/venn.cpp b/venn.cpp index 7c33dd7..3c719ec 100644 --- a/venn.cpp +++ b/venn.cpp @@ -69,8 +69,7 @@ void Venn::getPic(SAbundVector* sabund, vector vCalcs) { void Venn::getPic(vector lookup, vector vCalcs) { try { - //fills vector of sharedsabunds - lookup - //util->getSharedVectors(globaldata->Groups, lookup, sharedorder); //fills group vectors from order vector. + vector subset; /******************* 1 Group **************************/ if (lookup.size() == 1) { @@ -125,13 +124,16 @@ void Venn::getPic(vector lookup, vector vCalcs sabundA = new SAbundVector(lookup[0]->getSAbundVector());// sabundA = &sA; sabundB = new SAbundVector(lookup[1]->getSAbundVector());// sabundB = &sB; + subset.clear(); + subset.push_back(lookup[0]); subset.push_back(lookup[1]); + //make a file for each calculator for(int i=0;iinputFileName) + lookup[0]->getLabel() + ".venn." + vCalcs[i]->getName() + ".svg"; openOutputFile(filenamesvg, outsvg); //get estimates for sharedAB - vector shared = vCalcs[i]->getValues(lookup[0], lookup[1]); + vector shared = vCalcs[i]->getValues(subset); //in essence you want to run it like a single if (vCalcs[i]->getName() == "sharedsobs") { @@ -199,11 +201,19 @@ void Venn::getPic(vector lookup, vector vCalcs for(int i=0;iinputFileName) + lookup[0]->getLabel() + ".venn." + vCalcs[i]->getName() + ".svg"; openOutputFile(filenamesvg, outsvg); - + //get estimates for sharedAB, sharedAC and sharedBC - vector sharedAB = vCalcs[i]->getValues(lookup[0], lookup[1]); - vector sharedAC = vCalcs[i]->getValues(lookup[0], lookup[2]); - vector sharedBC = vCalcs[i]->getValues(lookup[1], lookup[2]); + subset.clear(); + subset.push_back(lookup[0]); subset.push_back(lookup[1]); + vector sharedAB = vCalcs[i]->getValues(subset); + + subset.clear(); + subset.push_back(lookup[0]); subset.push_back(lookup[2]); + vector sharedAC = vCalcs[i]->getValues(subset); + + subset.clear(); + subset.push_back(lookup[1]); subset.push_back(lookup[2]); + vector sharedBC = vCalcs[i]->getValues(subset); //merge BC and estimate with shared with A SharedRAbundVector* merge = new SharedRAbundVector(); @@ -211,7 +221,9 @@ void Venn::getPic(vector lookup, vector vCalcs merge->push_back((lookup[1]->getAbundance(j) + lookup[2]->getAbundance(j)), j, ""); } - vector sharedAwithBC = vCalcs[i]->getValues(lookup[0], merge); + subset.clear(); + subset.push_back(lookup[0]); subset.push_back(merge); + vector sharedAwithBC = vCalcs[i]->getValues(subset); delete merge; //merge AC and estimate with shared with B @@ -219,8 +231,10 @@ void Venn::getPic(vector lookup, vector vCalcs for (int j = 0; j < lookup[0]->size(); j++) { merge->push_back((lookup[0]->getAbundance(j) + lookup[2]->getAbundance(j)), j, ""); } - - vector sharedBwithAC = vCalcs[i]->getValues(lookup[1], merge); + + subset.clear(); + subset.push_back(merge); subset.push_back(lookup[1]); + vector sharedBwithAC = vCalcs[i]->getValues(subset); delete merge; //merge AB and estimate with shared with C @@ -229,7 +243,9 @@ void Venn::getPic(vector lookup, vector vCalcs merge->push_back((lookup[0]->getAbundance(j) + lookup[1]->getAbundance(j)), j, ""); } - vector sharedCwithAB = vCalcs[i]->getValues(lookup[2], merge); + subset.clear(); + subset.push_back(lookup[2]); subset.push_back(merge); + vector sharedCwithAB = vCalcs[i]->getValues(subset); delete merge; //in essence you want to run it like a single diff --git a/whittaker.cpp b/whittaker.cpp index 95f1c9c..3b17911 100644 --- a/whittaker.cpp +++ b/whittaker.cpp @@ -11,16 +11,16 @@ /***********************************************************************/ -EstOutput Whittaker::getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2){ +EstOutput Whittaker::getValues(vector shared){ try{ data.resize(1); int countA = 0; int countB = 0; - int sTotal = shared1->getNumBins(); + int sTotal = shared[0]->getNumBins(); for(int i=0;igetAbundance(i) != 0){ countA++; } - if(shared2->getAbundance(i) != 0){ countB++; } + if(shared[0]->getAbundance(i) != 0){ countA++; } + if(shared[1]->getAbundance(i) != 0){ countB++; } } data[0] = 2*sTotal/(float)(countA+countB)-1; diff --git a/whittaker.h b/whittaker.h index 22bf788..bcd72fc 100644 --- a/whittaker.h +++ b/whittaker.h @@ -18,9 +18,9 @@ class Whittaker : public Calculator { public: - Whittaker() : Calculator("whittaker", 3) {}; + Whittaker() : Calculator("whittaker", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(SharedRAbundVector*, SharedRAbundVector*); + EstOutput getValues(vector); };